1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 #include "tree-pass.h"
66 #include "tree-flow.h"
68 static rtx
legitimize_dllimport_symbol (rtx
, bool);
70 #ifndef CHECK_STACK_LIMIT
71 #define CHECK_STACK_LIMIT (-1)
74 /* Return index of given mode in mult and division cost tables. */
75 #define MODE_INDEX(mode) \
76 ((mode) == QImode ? 0 \
77 : (mode) == HImode ? 1 \
78 : (mode) == SImode ? 2 \
79 : (mode) == DImode ? 3 \
82 /* Processor costs (relative to an add) */
83 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
84 #define COSTS_N_BYTES(N) ((N) * 2)
86 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
89 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
90 COSTS_N_BYTES (2), /* cost of an add instruction */
91 COSTS_N_BYTES (3), /* cost of a lea instruction */
92 COSTS_N_BYTES (2), /* variable shift costs */
93 COSTS_N_BYTES (3), /* constant shift costs */
94 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
95 COSTS_N_BYTES (3), /* HI */
96 COSTS_N_BYTES (3), /* SI */
97 COSTS_N_BYTES (3), /* DI */
98 COSTS_N_BYTES (5)}, /* other */
99 0, /* cost of multiply per each bit set */
100 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
101 COSTS_N_BYTES (3), /* HI */
102 COSTS_N_BYTES (3), /* SI */
103 COSTS_N_BYTES (3), /* DI */
104 COSTS_N_BYTES (5)}, /* other */
105 COSTS_N_BYTES (3), /* cost of movsx */
106 COSTS_N_BYTES (3), /* cost of movzx */
107 0, /* "large" insn */
109 2, /* cost for loading QImode using movzbl */
110 {2, 2, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 2, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {2, 2, 2}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {2, 2, 2}, /* cost of storing fp registers
118 in SFmode, DFmode and XFmode */
119 3, /* cost of moving MMX register */
120 {3, 3}, /* cost of loading MMX registers
121 in SImode and DImode */
122 {3, 3}, /* cost of storing MMX registers
123 in SImode and DImode */
124 3, /* cost of moving SSE register */
125 {3, 3, 3}, /* cost of loading SSE registers
126 in SImode, DImode and TImode */
127 {3, 3, 3}, /* cost of storing SSE registers
128 in SImode, DImode and TImode */
129 3, /* MMX or SSE register to integer */
130 0, /* size of l1 cache */
131 0, /* size of l2 cache */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
135 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
136 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
137 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
138 COSTS_N_BYTES (2), /* cost of FABS instruction. */
139 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
140 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
141 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
142 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
143 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
144 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
145 1, /* scalar_stmt_cost. */
146 1, /* scalar load_cost. */
147 1, /* scalar_store_cost. */
148 1, /* vec_stmt_cost. */
149 1, /* vec_to_scalar_cost. */
150 1, /* scalar_to_vec_cost. */
151 1, /* vec_align_load_cost. */
152 1, /* vec_unalign_load_cost. */
153 1, /* vec_store_cost. */
154 1, /* cond_taken_branch_cost. */
155 1, /* cond_not_taken_branch_cost. */
158 /* Processor costs (relative to an add) */
160 struct processor_costs i386_cost
= { /* 386 specific costs */
161 COSTS_N_INSNS (1), /* cost of an add instruction */
162 COSTS_N_INSNS (1), /* cost of a lea instruction */
163 COSTS_N_INSNS (3), /* variable shift costs */
164 COSTS_N_INSNS (2), /* constant shift costs */
165 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
166 COSTS_N_INSNS (6), /* HI */
167 COSTS_N_INSNS (6), /* SI */
168 COSTS_N_INSNS (6), /* DI */
169 COSTS_N_INSNS (6)}, /* other */
170 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
171 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
172 COSTS_N_INSNS (23), /* HI */
173 COSTS_N_INSNS (23), /* SI */
174 COSTS_N_INSNS (23), /* DI */
175 COSTS_N_INSNS (23)}, /* other */
176 COSTS_N_INSNS (3), /* cost of movsx */
177 COSTS_N_INSNS (2), /* cost of movzx */
178 15, /* "large" insn */
180 4, /* cost for loading QImode using movzbl */
181 {2, 4, 2}, /* cost of loading integer registers
182 in QImode, HImode and SImode.
183 Relative to reg-reg move (2). */
184 {2, 4, 2}, /* cost of storing integer registers */
185 2, /* cost of reg,reg fld/fst */
186 {8, 8, 8}, /* cost of loading fp registers
187 in SFmode, DFmode and XFmode */
188 {8, 8, 8}, /* cost of storing fp registers
189 in SFmode, DFmode and XFmode */
190 2, /* cost of moving MMX register */
191 {4, 8}, /* cost of loading MMX registers
192 in SImode and DImode */
193 {4, 8}, /* cost of storing MMX registers
194 in SImode and DImode */
195 2, /* cost of moving SSE register */
196 {4, 8, 16}, /* cost of loading SSE registers
197 in SImode, DImode and TImode */
198 {4, 8, 16}, /* cost of storing SSE registers
199 in SImode, DImode and TImode */
200 3, /* MMX or SSE register to integer */
201 0, /* size of l1 cache */
202 0, /* size of l2 cache */
203 0, /* size of prefetch block */
204 0, /* number of parallel prefetches */
206 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
207 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
208 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
209 COSTS_N_INSNS (22), /* cost of FABS instruction. */
210 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
211 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
212 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
213 DUMMY_STRINGOP_ALGS
},
214 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
215 DUMMY_STRINGOP_ALGS
},
216 1, /* scalar_stmt_cost. */
217 1, /* scalar load_cost. */
218 1, /* scalar_store_cost. */
219 1, /* vec_stmt_cost. */
220 1, /* vec_to_scalar_cost. */
221 1, /* scalar_to_vec_cost. */
222 1, /* vec_align_load_cost. */
223 2, /* vec_unalign_load_cost. */
224 1, /* vec_store_cost. */
225 3, /* cond_taken_branch_cost. */
226 1, /* cond_not_taken_branch_cost. */
230 struct processor_costs i486_cost
= { /* 486 specific costs */
231 COSTS_N_INSNS (1), /* cost of an add instruction */
232 COSTS_N_INSNS (1), /* cost of a lea instruction */
233 COSTS_N_INSNS (3), /* variable shift costs */
234 COSTS_N_INSNS (2), /* constant shift costs */
235 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
236 COSTS_N_INSNS (12), /* HI */
237 COSTS_N_INSNS (12), /* SI */
238 COSTS_N_INSNS (12), /* DI */
239 COSTS_N_INSNS (12)}, /* other */
240 1, /* cost of multiply per each bit set */
241 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
242 COSTS_N_INSNS (40), /* HI */
243 COSTS_N_INSNS (40), /* SI */
244 COSTS_N_INSNS (40), /* DI */
245 COSTS_N_INSNS (40)}, /* other */
246 COSTS_N_INSNS (3), /* cost of movsx */
247 COSTS_N_INSNS (2), /* cost of movzx */
248 15, /* "large" insn */
250 4, /* cost for loading QImode using movzbl */
251 {2, 4, 2}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 4, 2}, /* cost of storing integer registers */
255 2, /* cost of reg,reg fld/fst */
256 {8, 8, 8}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {8, 8, 8}, /* cost of storing fp registers
259 in SFmode, DFmode and XFmode */
260 2, /* cost of moving MMX register */
261 {4, 8}, /* cost of loading MMX registers
262 in SImode and DImode */
263 {4, 8}, /* cost of storing MMX registers
264 in SImode and DImode */
265 2, /* cost of moving SSE register */
266 {4, 8, 16}, /* cost of loading SSE registers
267 in SImode, DImode and TImode */
268 {4, 8, 16}, /* cost of storing SSE registers
269 in SImode, DImode and TImode */
270 3, /* MMX or SSE register to integer */
271 4, /* size of l1 cache. 486 has 8kB cache
272 shared for code and data, so 4kB is
273 not really precise. */
274 4, /* size of l2 cache */
275 0, /* size of prefetch block */
276 0, /* number of parallel prefetches */
278 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
279 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
280 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
281 COSTS_N_INSNS (3), /* cost of FABS instruction. */
282 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
283 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
284 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
285 DUMMY_STRINGOP_ALGS
},
286 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
287 DUMMY_STRINGOP_ALGS
},
288 1, /* scalar_stmt_cost. */
289 1, /* scalar load_cost. */
290 1, /* scalar_store_cost. */
291 1, /* vec_stmt_cost. */
292 1, /* vec_to_scalar_cost. */
293 1, /* scalar_to_vec_cost. */
294 1, /* vec_align_load_cost. */
295 2, /* vec_unalign_load_cost. */
296 1, /* vec_store_cost. */
297 3, /* cond_taken_branch_cost. */
298 1, /* cond_not_taken_branch_cost. */
302 struct processor_costs pentium_cost
= {
303 COSTS_N_INSNS (1), /* cost of an add instruction */
304 COSTS_N_INSNS (1), /* cost of a lea instruction */
305 COSTS_N_INSNS (4), /* variable shift costs */
306 COSTS_N_INSNS (1), /* constant shift costs */
307 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
308 COSTS_N_INSNS (11), /* HI */
309 COSTS_N_INSNS (11), /* SI */
310 COSTS_N_INSNS (11), /* DI */
311 COSTS_N_INSNS (11)}, /* other */
312 0, /* cost of multiply per each bit set */
313 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
314 COSTS_N_INSNS (25), /* HI */
315 COSTS_N_INSNS (25), /* SI */
316 COSTS_N_INSNS (25), /* DI */
317 COSTS_N_INSNS (25)}, /* other */
318 COSTS_N_INSNS (3), /* cost of movsx */
319 COSTS_N_INSNS (2), /* cost of movzx */
320 8, /* "large" insn */
322 6, /* cost for loading QImode using movzbl */
323 {2, 4, 2}, /* cost of loading integer registers
324 in QImode, HImode and SImode.
325 Relative to reg-reg move (2). */
326 {2, 4, 2}, /* cost of storing integer registers */
327 2, /* cost of reg,reg fld/fst */
328 {2, 2, 6}, /* cost of loading fp registers
329 in SFmode, DFmode and XFmode */
330 {4, 4, 6}, /* cost of storing fp registers
331 in SFmode, DFmode and XFmode */
332 8, /* cost of moving MMX register */
333 {8, 8}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {8, 8}, /* cost of storing MMX registers
336 in SImode and DImode */
337 2, /* cost of moving SSE register */
338 {4, 8, 16}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {4, 8, 16}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 3, /* MMX or SSE register to integer */
343 8, /* size of l1 cache. */
344 8, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
348 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (1), /* cost of FABS instruction. */
352 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
354 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
355 DUMMY_STRINGOP_ALGS
},
356 {{libcall
, {{-1, rep_prefix_4_byte
}}},
357 DUMMY_STRINGOP_ALGS
},
358 1, /* scalar_stmt_cost. */
359 1, /* scalar load_cost. */
360 1, /* scalar_store_cost. */
361 1, /* vec_stmt_cost. */
362 1, /* vec_to_scalar_cost. */
363 1, /* scalar_to_vec_cost. */
364 1, /* vec_align_load_cost. */
365 2, /* vec_unalign_load_cost. */
366 1, /* vec_store_cost. */
367 3, /* cond_taken_branch_cost. */
368 1, /* cond_not_taken_branch_cost. */
372 struct processor_costs pentiumpro_cost
= {
373 COSTS_N_INSNS (1), /* cost of an add instruction */
374 COSTS_N_INSNS (1), /* cost of a lea instruction */
375 COSTS_N_INSNS (1), /* variable shift costs */
376 COSTS_N_INSNS (1), /* constant shift costs */
377 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
378 COSTS_N_INSNS (4), /* HI */
379 COSTS_N_INSNS (4), /* SI */
380 COSTS_N_INSNS (4), /* DI */
381 COSTS_N_INSNS (4)}, /* other */
382 0, /* cost of multiply per each bit set */
383 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
384 COSTS_N_INSNS (17), /* HI */
385 COSTS_N_INSNS (17), /* SI */
386 COSTS_N_INSNS (17), /* DI */
387 COSTS_N_INSNS (17)}, /* other */
388 COSTS_N_INSNS (1), /* cost of movsx */
389 COSTS_N_INSNS (1), /* cost of movzx */
390 8, /* "large" insn */
392 2, /* cost for loading QImode using movzbl */
393 {4, 4, 4}, /* cost of loading integer registers
394 in QImode, HImode and SImode.
395 Relative to reg-reg move (2). */
396 {2, 2, 2}, /* cost of storing integer registers */
397 2, /* cost of reg,reg fld/fst */
398 {2, 2, 6}, /* cost of loading fp registers
399 in SFmode, DFmode and XFmode */
400 {4, 4, 6}, /* cost of storing fp registers
401 in SFmode, DFmode and XFmode */
402 2, /* cost of moving MMX register */
403 {2, 2}, /* cost of loading MMX registers
404 in SImode and DImode */
405 {2, 2}, /* cost of storing MMX registers
406 in SImode and DImode */
407 2, /* cost of moving SSE register */
408 {2, 2, 8}, /* cost of loading SSE registers
409 in SImode, DImode and TImode */
410 {2, 2, 8}, /* cost of storing SSE registers
411 in SImode, DImode and TImode */
412 3, /* MMX or SSE register to integer */
413 8, /* size of l1 cache. */
414 256, /* size of l2 cache */
415 32, /* size of prefetch block */
416 6, /* number of parallel prefetches */
418 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
419 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
420 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
421 COSTS_N_INSNS (2), /* cost of FABS instruction. */
422 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
423 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
424 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
425 (we ensure the alignment). For small blocks inline loop is still a
426 noticeable win, for bigger blocks either rep movsl or rep movsb is
427 way to go. Rep movsb has apparently more expensive startup time in CPU,
428 but after 4K the difference is down in the noise. */
429 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
430 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
431 DUMMY_STRINGOP_ALGS
},
432 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
433 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
434 DUMMY_STRINGOP_ALGS
},
435 1, /* scalar_stmt_cost. */
436 1, /* scalar load_cost. */
437 1, /* scalar_store_cost. */
438 1, /* vec_stmt_cost. */
439 1, /* vec_to_scalar_cost. */
440 1, /* scalar_to_vec_cost. */
441 1, /* vec_align_load_cost. */
442 2, /* vec_unalign_load_cost. */
443 1, /* vec_store_cost. */
444 3, /* cond_taken_branch_cost. */
445 1, /* cond_not_taken_branch_cost. */
449 struct processor_costs geode_cost
= {
450 COSTS_N_INSNS (1), /* cost of an add instruction */
451 COSTS_N_INSNS (1), /* cost of a lea instruction */
452 COSTS_N_INSNS (2), /* variable shift costs */
453 COSTS_N_INSNS (1), /* constant shift costs */
454 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
455 COSTS_N_INSNS (4), /* HI */
456 COSTS_N_INSNS (7), /* SI */
457 COSTS_N_INSNS (7), /* DI */
458 COSTS_N_INSNS (7)}, /* other */
459 0, /* cost of multiply per each bit set */
460 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
461 COSTS_N_INSNS (23), /* HI */
462 COSTS_N_INSNS (39), /* SI */
463 COSTS_N_INSNS (39), /* DI */
464 COSTS_N_INSNS (39)}, /* other */
465 COSTS_N_INSNS (1), /* cost of movsx */
466 COSTS_N_INSNS (1), /* cost of movzx */
467 8, /* "large" insn */
469 1, /* cost for loading QImode using movzbl */
470 {1, 1, 1}, /* cost of loading integer registers
471 in QImode, HImode and SImode.
472 Relative to reg-reg move (2). */
473 {1, 1, 1}, /* cost of storing integer registers */
474 1, /* cost of reg,reg fld/fst */
475 {1, 1, 1}, /* cost of loading fp registers
476 in SFmode, DFmode and XFmode */
477 {4, 6, 6}, /* cost of storing fp registers
478 in SFmode, DFmode and XFmode */
480 1, /* cost of moving MMX register */
481 {1, 1}, /* cost of loading MMX registers
482 in SImode and DImode */
483 {1, 1}, /* cost of storing MMX registers
484 in SImode and DImode */
485 1, /* cost of moving SSE register */
486 {1, 1, 1}, /* cost of loading SSE registers
487 in SImode, DImode and TImode */
488 {1, 1, 1}, /* cost of storing SSE registers
489 in SImode, DImode and TImode */
490 1, /* MMX or SSE register to integer */
491 64, /* size of l1 cache. */
492 128, /* size of l2 cache. */
493 32, /* size of prefetch block */
494 1, /* number of parallel prefetches */
496 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
497 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
498 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
501 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
502 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
503 DUMMY_STRINGOP_ALGS
},
504 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
505 DUMMY_STRINGOP_ALGS
},
506 1, /* scalar_stmt_cost. */
507 1, /* scalar load_cost. */
508 1, /* scalar_store_cost. */
509 1, /* vec_stmt_cost. */
510 1, /* vec_to_scalar_cost. */
511 1, /* scalar_to_vec_cost. */
512 1, /* vec_align_load_cost. */
513 2, /* vec_unalign_load_cost. */
514 1, /* vec_store_cost. */
515 3, /* cond_taken_branch_cost. */
516 1, /* cond_not_taken_branch_cost. */
520 struct processor_costs k6_cost
= {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (2), /* cost of a lea instruction */
523 COSTS_N_INSNS (1), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (3), /* HI */
527 COSTS_N_INSNS (3), /* SI */
528 COSTS_N_INSNS (3), /* DI */
529 COSTS_N_INSNS (3)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (18), /* HI */
533 COSTS_N_INSNS (18), /* SI */
534 COSTS_N_INSNS (18), /* DI */
535 COSTS_N_INSNS (18)}, /* other */
536 COSTS_N_INSNS (2), /* cost of movsx */
537 COSTS_N_INSNS (2), /* cost of movzx */
538 8, /* "large" insn */
540 3, /* cost for loading QImode using movzbl */
541 {4, 5, 4}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {2, 3, 2}, /* cost of storing integer registers */
545 4, /* cost of reg,reg fld/fst */
546 {6, 6, 6}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 4, 4}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
550 2, /* cost of moving MMX register */
551 {2, 2}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {2, 2}, /* cost of storing MMX registers
554 in SImode and DImode */
555 2, /* cost of moving SSE register */
556 {2, 2, 8}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {2, 2, 8}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 6, /* MMX or SSE register to integer */
561 32, /* size of l1 cache. */
562 32, /* size of l2 cache. Some models
563 have integrated l2 cache, but
564 optimizing for k6 is not important
565 enough to worry about that. */
566 32, /* size of prefetch block */
567 1, /* number of parallel prefetches */
569 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
570 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
571 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
572 COSTS_N_INSNS (2), /* cost of FABS instruction. */
573 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
574 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
575 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
576 DUMMY_STRINGOP_ALGS
},
577 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
578 DUMMY_STRINGOP_ALGS
},
579 1, /* scalar_stmt_cost. */
580 1, /* scalar load_cost. */
581 1, /* scalar_store_cost. */
582 1, /* vec_stmt_cost. */
583 1, /* vec_to_scalar_cost. */
584 1, /* scalar_to_vec_cost. */
585 1, /* vec_align_load_cost. */
586 2, /* vec_unalign_load_cost. */
587 1, /* vec_store_cost. */
588 3, /* cond_taken_branch_cost. */
589 1, /* cond_not_taken_branch_cost. */
593 struct processor_costs athlon_cost
= {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (5), /* HI */
600 COSTS_N_INSNS (5), /* SI */
601 COSTS_N_INSNS (5), /* DI */
602 COSTS_N_INSNS (5)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (26), /* HI */
606 COSTS_N_INSNS (42), /* SI */
607 COSTS_N_INSNS (74), /* DI */
608 COSTS_N_INSNS (74)}, /* other */
609 COSTS_N_INSNS (1), /* cost of movsx */
610 COSTS_N_INSNS (1), /* cost of movzx */
611 8, /* "large" insn */
613 4, /* cost for loading QImode using movzbl */
614 {3, 4, 3}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {3, 4, 3}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {4, 4, 12}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {6, 6, 8}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {4, 4}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {4, 4}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {4, 4, 6}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {4, 4, 5}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 5, /* MMX or SSE register to integer */
634 64, /* size of l1 cache. */
635 256, /* size of l2 cache. */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
639 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
649 DUMMY_STRINGOP_ALGS
},
650 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
651 DUMMY_STRINGOP_ALGS
},
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
666 struct processor_costs k8_cost
= {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (2), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (4), /* HI */
673 COSTS_N_INSNS (3), /* SI */
674 COSTS_N_INSNS (4), /* DI */
675 COSTS_N_INSNS (5)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (26), /* HI */
679 COSTS_N_INSNS (42), /* SI */
680 COSTS_N_INSNS (74), /* DI */
681 COSTS_N_INSNS (74)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 8, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {3, 4, 3}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {3, 4, 3}, /* cost of storing integer registers */
691 4, /* cost of reg,reg fld/fst */
692 {4, 4, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {6, 6, 8}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 2, /* cost of moving MMX register */
697 {3, 3}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {4, 4}, /* cost of storing MMX registers
700 in SImode and DImode */
701 2, /* cost of moving SSE register */
702 {4, 3, 6}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {4, 4, 5}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 5, /* MMX or SSE register to integer */
707 64, /* size of l1 cache. */
708 512, /* size of l2 cache. */
709 64, /* size of prefetch block */
710 /* New AMD processors never drop prefetches; if they cannot be performed
711 immediately, they are queued. We set number of simultaneous prefetches
712 to a large constant to reflect this (it probably is not a good idea not
713 to limit number of prefetches at all, as their execution also takes some
715 100, /* number of parallel prefetches */
717 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
718 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
719 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
720 COSTS_N_INSNS (2), /* cost of FABS instruction. */
721 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
722 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
727 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
728 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
729 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
730 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
731 4, /* scalar_stmt_cost. */
732 2, /* scalar load_cost. */
733 2, /* scalar_store_cost. */
734 5, /* vec_stmt_cost. */
735 0, /* vec_to_scalar_cost. */
736 2, /* scalar_to_vec_cost. */
737 2, /* vec_align_load_cost. */
738 3, /* vec_unalign_load_cost. */
739 3, /* vec_store_cost. */
740 3, /* cond_taken_branch_cost. */
741 2, /* cond_not_taken_branch_cost. */
744 struct processor_costs amdfam10_cost
= {
745 COSTS_N_INSNS (1), /* cost of an add instruction */
746 COSTS_N_INSNS (2), /* cost of a lea instruction */
747 COSTS_N_INSNS (1), /* variable shift costs */
748 COSTS_N_INSNS (1), /* constant shift costs */
749 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
750 COSTS_N_INSNS (4), /* HI */
751 COSTS_N_INSNS (3), /* SI */
752 COSTS_N_INSNS (4), /* DI */
753 COSTS_N_INSNS (5)}, /* other */
754 0, /* cost of multiply per each bit set */
755 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
756 COSTS_N_INSNS (35), /* HI */
757 COSTS_N_INSNS (51), /* SI */
758 COSTS_N_INSNS (83), /* DI */
759 COSTS_N_INSNS (83)}, /* other */
760 COSTS_N_INSNS (1), /* cost of movsx */
761 COSTS_N_INSNS (1), /* cost of movzx */
762 8, /* "large" insn */
764 4, /* cost for loading QImode using movzbl */
765 {3, 4, 3}, /* cost of loading integer registers
766 in QImode, HImode and SImode.
767 Relative to reg-reg move (2). */
768 {3, 4, 3}, /* cost of storing integer registers */
769 4, /* cost of reg,reg fld/fst */
770 {4, 4, 12}, /* cost of loading fp registers
771 in SFmode, DFmode and XFmode */
772 {6, 6, 8}, /* cost of storing fp registers
773 in SFmode, DFmode and XFmode */
774 2, /* cost of moving MMX register */
775 {3, 3}, /* cost of loading MMX registers
776 in SImode and DImode */
777 {4, 4}, /* cost of storing MMX registers
778 in SImode and DImode */
779 2, /* cost of moving SSE register */
780 {4, 4, 3}, /* cost of loading SSE registers
781 in SImode, DImode and TImode */
782 {4, 4, 5}, /* cost of storing SSE registers
783 in SImode, DImode and TImode */
784 3, /* MMX or SSE register to integer */
786 MOVD reg64, xmmreg Double FSTORE 4
787 MOVD reg32, xmmreg Double FSTORE 4
789 MOVD reg64, xmmreg Double FADD 3
791 MOVD reg32, xmmreg Double FADD 3
793 64, /* size of l1 cache. */
794 512, /* size of l2 cache. */
795 64, /* size of prefetch block */
796 /* New AMD processors never drop prefetches; if they cannot be performed
797 immediately, they are queued. We set number of simultaneous prefetches
798 to a large constant to reflect this (it probably is not a good idea not
799 to limit number of prefetches at all, as their execution also takes some
801 100, /* number of parallel prefetches */
803 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
804 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
805 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
806 COSTS_N_INSNS (2), /* cost of FABS instruction. */
807 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
808 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
810 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
811 very small blocks it is better to use loop. For large blocks, libcall can
812 do nontemporary accesses and beat inline considerably. */
813 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
814 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
815 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
816 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
817 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
818 4, /* scalar_stmt_cost. */
819 2, /* scalar load_cost. */
820 2, /* scalar_store_cost. */
821 6, /* vec_stmt_cost. */
822 0, /* vec_to_scalar_cost. */
823 2, /* scalar_to_vec_cost. */
824 2, /* vec_align_load_cost. */
825 2, /* vec_unalign_load_cost. */
826 2, /* vec_store_cost. */
827 2, /* cond_taken_branch_cost. */
828 1, /* cond_not_taken_branch_cost. */
831 struct processor_costs bdver1_cost
= {
832 COSTS_N_INSNS (1), /* cost of an add instruction */
833 COSTS_N_INSNS (1), /* cost of a lea instruction */
834 COSTS_N_INSNS (1), /* variable shift costs */
835 COSTS_N_INSNS (1), /* constant shift costs */
836 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
837 COSTS_N_INSNS (4), /* HI */
838 COSTS_N_INSNS (4), /* SI */
839 COSTS_N_INSNS (6), /* DI */
840 COSTS_N_INSNS (6)}, /* other */
841 0, /* cost of multiply per each bit set */
842 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
843 COSTS_N_INSNS (35), /* HI */
844 COSTS_N_INSNS (51), /* SI */
845 COSTS_N_INSNS (83), /* DI */
846 COSTS_N_INSNS (83)}, /* other */
847 COSTS_N_INSNS (1), /* cost of movsx */
848 COSTS_N_INSNS (1), /* cost of movzx */
849 8, /* "large" insn */
851 4, /* cost for loading QImode using movzbl */
852 {5, 5, 4}, /* cost of loading integer registers
853 in QImode, HImode and SImode.
854 Relative to reg-reg move (2). */
855 {4, 4, 4}, /* cost of storing integer registers */
856 2, /* cost of reg,reg fld/fst */
857 {5, 5, 12}, /* cost of loading fp registers
858 in SFmode, DFmode and XFmode */
859 {4, 4, 8}, /* cost of storing fp registers
860 in SFmode, DFmode and XFmode */
861 2, /* cost of moving MMX register */
862 {4, 4}, /* cost of loading MMX registers
863 in SImode and DImode */
864 {4, 4}, /* cost of storing MMX registers
865 in SImode and DImode */
866 2, /* cost of moving SSE register */
867 {4, 4, 4}, /* cost of loading SSE registers
868 in SImode, DImode and TImode */
869 {4, 4, 4}, /* cost of storing SSE registers
870 in SImode, DImode and TImode */
871 2, /* MMX or SSE register to integer */
873 MOVD reg64, xmmreg Double FSTORE 4
874 MOVD reg32, xmmreg Double FSTORE 4
876 MOVD reg64, xmmreg Double FADD 3
878 MOVD reg32, xmmreg Double FADD 3
880 16, /* size of l1 cache. */
881 2048, /* size of l2 cache. */
882 64, /* size of prefetch block */
883 /* New AMD processors never drop prefetches; if they cannot be performed
884 immediately, they are queued. We set number of simultaneous prefetches
885 to a large constant to reflect this (it probably is not a good idea not
886 to limit number of prefetches at all, as their execution also takes some
888 100, /* number of parallel prefetches */
890 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
891 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
892 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
893 COSTS_N_INSNS (2), /* cost of FABS instruction. */
894 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
895 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
897 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
898 very small blocks it is better to use loop. For large blocks, libcall
899 can do nontemporary accesses and beat inline considerably. */
900 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
901 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
902 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
903 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
904 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
905 6, /* scalar_stmt_cost. */
906 4, /* scalar load_cost. */
907 4, /* scalar_store_cost. */
908 6, /* vec_stmt_cost. */
909 0, /* vec_to_scalar_cost. */
910 2, /* scalar_to_vec_cost. */
911 4, /* vec_align_load_cost. */
912 4, /* vec_unalign_load_cost. */
913 4, /* vec_store_cost. */
914 2, /* cond_taken_branch_cost. */
915 1, /* cond_not_taken_branch_cost. */
918 struct processor_costs bdver2_cost
= {
919 COSTS_N_INSNS (1), /* cost of an add instruction */
920 COSTS_N_INSNS (1), /* cost of a lea instruction */
921 COSTS_N_INSNS (1), /* variable shift costs */
922 COSTS_N_INSNS (1), /* constant shift costs */
923 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
924 COSTS_N_INSNS (4), /* HI */
925 COSTS_N_INSNS (4), /* SI */
926 COSTS_N_INSNS (6), /* DI */
927 COSTS_N_INSNS (6)}, /* other */
928 0, /* cost of multiply per each bit set */
929 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
930 COSTS_N_INSNS (35), /* HI */
931 COSTS_N_INSNS (51), /* SI */
932 COSTS_N_INSNS (83), /* DI */
933 COSTS_N_INSNS (83)}, /* other */
934 COSTS_N_INSNS (1), /* cost of movsx */
935 COSTS_N_INSNS (1), /* cost of movzx */
936 8, /* "large" insn */
938 4, /* cost for loading QImode using movzbl */
939 {5, 5, 4}, /* cost of loading integer registers
940 in QImode, HImode and SImode.
941 Relative to reg-reg move (2). */
942 {4, 4, 4}, /* cost of storing integer registers */
943 2, /* cost of reg,reg fld/fst */
944 {5, 5, 12}, /* cost of loading fp registers
945 in SFmode, DFmode and XFmode */
946 {4, 4, 8}, /* cost of storing fp registers
947 in SFmode, DFmode and XFmode */
948 2, /* cost of moving MMX register */
949 {4, 4}, /* cost of loading MMX registers
950 in SImode and DImode */
951 {4, 4}, /* cost of storing MMX registers
952 in SImode and DImode */
953 2, /* cost of moving SSE register */
954 {4, 4, 4}, /* cost of loading SSE registers
955 in SImode, DImode and TImode */
956 {4, 4, 4}, /* cost of storing SSE registers
957 in SImode, DImode and TImode */
958 2, /* MMX or SSE register to integer */
960 MOVD reg64, xmmreg Double FSTORE 4
961 MOVD reg32, xmmreg Double FSTORE 4
963 MOVD reg64, xmmreg Double FADD 3
965 MOVD reg32, xmmreg Double FADD 3
967 16, /* size of l1 cache. */
968 2048, /* size of l2 cache. */
969 64, /* size of prefetch block */
970 /* New AMD processors never drop prefetches; if they cannot be performed
971 immediately, they are queued. We set number of simultaneous prefetches
972 to a large constant to reflect this (it probably is not a good idea not
973 to limit number of prefetches at all, as their execution also takes some
975 100, /* number of parallel prefetches */
977 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
978 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
979 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
980 COSTS_N_INSNS (2), /* cost of FABS instruction. */
981 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
982 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
984 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
985 very small blocks it is better to use loop. For large blocks, libcall
986 can do nontemporary accesses and beat inline considerably. */
987 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
988 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
989 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
990 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
991 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
992 6, /* scalar_stmt_cost. */
993 4, /* scalar load_cost. */
994 4, /* scalar_store_cost. */
995 6, /* vec_stmt_cost. */
996 0, /* vec_to_scalar_cost. */
997 2, /* scalar_to_vec_cost. */
998 4, /* vec_align_load_cost. */
999 4, /* vec_unalign_load_cost. */
1000 4, /* vec_store_cost. */
1001 2, /* cond_taken_branch_cost. */
1002 1, /* cond_not_taken_branch_cost. */
1005 struct processor_costs bdver3_cost
= {
1006 COSTS_N_INSNS (1), /* cost of an add instruction */
1007 COSTS_N_INSNS (1), /* cost of a lea instruction */
1008 COSTS_N_INSNS (1), /* variable shift costs */
1009 COSTS_N_INSNS (1), /* constant shift costs */
1010 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1011 COSTS_N_INSNS (4), /* HI */
1012 COSTS_N_INSNS (4), /* SI */
1013 COSTS_N_INSNS (6), /* DI */
1014 COSTS_N_INSNS (6)}, /* other */
1015 0, /* cost of multiply per each bit set */
1016 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1017 COSTS_N_INSNS (35), /* HI */
1018 COSTS_N_INSNS (51), /* SI */
1019 COSTS_N_INSNS (83), /* DI */
1020 COSTS_N_INSNS (83)}, /* other */
1021 COSTS_N_INSNS (1), /* cost of movsx */
1022 COSTS_N_INSNS (1), /* cost of movzx */
1023 8, /* "large" insn */
1025 4, /* cost for loading QImode using movzbl */
1026 {5, 5, 4}, /* cost of loading integer registers
1027 in QImode, HImode and SImode.
1028 Relative to reg-reg move (2). */
1029 {4, 4, 4}, /* cost of storing integer registers */
1030 2, /* cost of reg,reg fld/fst */
1031 {5, 5, 12}, /* cost of loading fp registers
1032 in SFmode, DFmode and XFmode */
1033 {4, 4, 8}, /* cost of storing fp registers
1034 in SFmode, DFmode and XFmode */
1035 2, /* cost of moving MMX register */
1036 {4, 4}, /* cost of loading MMX registers
1037 in SImode and DImode */
1038 {4, 4}, /* cost of storing MMX registers
1039 in SImode and DImode */
1040 2, /* cost of moving SSE register */
1041 {4, 4, 4}, /* cost of loading SSE registers
1042 in SImode, DImode and TImode */
1043 {4, 4, 4}, /* cost of storing SSE registers
1044 in SImode, DImode and TImode */
1045 2, /* MMX or SSE register to integer */
1046 16, /* size of l1 cache. */
1047 2048, /* size of l2 cache. */
1048 64, /* size of prefetch block */
1049 /* New AMD processors never drop prefetches; if they cannot be performed
1050 immediately, they are queued. We set number of simultaneous prefetches
1051 to a large constant to reflect this (it probably is not a good idea not
1052 to limit number of prefetches at all, as their execution also takes some
1054 100, /* number of parallel prefetches */
1055 2, /* Branch cost */
1056 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1057 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1058 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1059 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1060 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1061 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1063 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1064 very small blocks it is better to use loop. For large blocks, libcall
1065 can do nontemporary accesses and beat inline considerably. */
1066 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1067 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1068 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1069 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1070 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1071 6, /* scalar_stmt_cost. */
1072 4, /* scalar load_cost. */
1073 4, /* scalar_store_cost. */
1074 6, /* vec_stmt_cost. */
1075 0, /* vec_to_scalar_cost. */
1076 2, /* scalar_to_vec_cost. */
1077 4, /* vec_align_load_cost. */
1078 4, /* vec_unalign_load_cost. */
1079 4, /* vec_store_cost. */
1080 2, /* cond_taken_branch_cost. */
1081 1, /* cond_not_taken_branch_cost. */
1084 struct processor_costs btver1_cost
= {
1085 COSTS_N_INSNS (1), /* cost of an add instruction */
1086 COSTS_N_INSNS (2), /* cost of a lea instruction */
1087 COSTS_N_INSNS (1), /* variable shift costs */
1088 COSTS_N_INSNS (1), /* constant shift costs */
1089 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1090 COSTS_N_INSNS (4), /* HI */
1091 COSTS_N_INSNS (3), /* SI */
1092 COSTS_N_INSNS (4), /* DI */
1093 COSTS_N_INSNS (5)}, /* other */
1094 0, /* cost of multiply per each bit set */
1095 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1096 COSTS_N_INSNS (35), /* HI */
1097 COSTS_N_INSNS (51), /* SI */
1098 COSTS_N_INSNS (83), /* DI */
1099 COSTS_N_INSNS (83)}, /* other */
1100 COSTS_N_INSNS (1), /* cost of movsx */
1101 COSTS_N_INSNS (1), /* cost of movzx */
1102 8, /* "large" insn */
1104 4, /* cost for loading QImode using movzbl */
1105 {3, 4, 3}, /* cost of loading integer registers
1106 in QImode, HImode and SImode.
1107 Relative to reg-reg move (2). */
1108 {3, 4, 3}, /* cost of storing integer registers */
1109 4, /* cost of reg,reg fld/fst */
1110 {4, 4, 12}, /* cost of loading fp registers
1111 in SFmode, DFmode and XFmode */
1112 {6, 6, 8}, /* cost of storing fp registers
1113 in SFmode, DFmode and XFmode */
1114 2, /* cost of moving MMX register */
1115 {3, 3}, /* cost of loading MMX registers
1116 in SImode and DImode */
1117 {4, 4}, /* cost of storing MMX registers
1118 in SImode and DImode */
1119 2, /* cost of moving SSE register */
1120 {4, 4, 3}, /* cost of loading SSE registers
1121 in SImode, DImode and TImode */
1122 {4, 4, 5}, /* cost of storing SSE registers
1123 in SImode, DImode and TImode */
1124 3, /* MMX or SSE register to integer */
1126 MOVD reg64, xmmreg Double FSTORE 4
1127 MOVD reg32, xmmreg Double FSTORE 4
1129 MOVD reg64, xmmreg Double FADD 3
1131 MOVD reg32, xmmreg Double FADD 3
1133 32, /* size of l1 cache. */
1134 512, /* size of l2 cache. */
1135 64, /* size of prefetch block */
1136 100, /* number of parallel prefetches */
1137 2, /* Branch cost */
1138 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1139 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1140 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1141 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1142 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1143 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1145 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1146 very small blocks it is better to use loop. For large blocks, libcall can
1147 do nontemporary accesses and beat inline considerably. */
1148 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1149 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1150 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1151 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1152 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1153 4, /* scalar_stmt_cost. */
1154 2, /* scalar load_cost. */
1155 2, /* scalar_store_cost. */
1156 6, /* vec_stmt_cost. */
1157 0, /* vec_to_scalar_cost. */
1158 2, /* scalar_to_vec_cost. */
1159 2, /* vec_align_load_cost. */
1160 2, /* vec_unalign_load_cost. */
1161 2, /* vec_store_cost. */
1162 2, /* cond_taken_branch_cost. */
1163 1, /* cond_not_taken_branch_cost. */
1166 struct processor_costs btver2_cost
= {
1167 COSTS_N_INSNS (1), /* cost of an add instruction */
1168 COSTS_N_INSNS (2), /* cost of a lea instruction */
1169 COSTS_N_INSNS (1), /* variable shift costs */
1170 COSTS_N_INSNS (1), /* constant shift costs */
1171 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1172 COSTS_N_INSNS (4), /* HI */
1173 COSTS_N_INSNS (3), /* SI */
1174 COSTS_N_INSNS (4), /* DI */
1175 COSTS_N_INSNS (5)}, /* other */
1176 0, /* cost of multiply per each bit set */
1177 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1178 COSTS_N_INSNS (35), /* HI */
1179 COSTS_N_INSNS (51), /* SI */
1180 COSTS_N_INSNS (83), /* DI */
1181 COSTS_N_INSNS (83)}, /* other */
1182 COSTS_N_INSNS (1), /* cost of movsx */
1183 COSTS_N_INSNS (1), /* cost of movzx */
1184 8, /* "large" insn */
1186 4, /* cost for loading QImode using movzbl */
1187 {3, 4, 3}, /* cost of loading integer registers
1188 in QImode, HImode and SImode.
1189 Relative to reg-reg move (2). */
1190 {3, 4, 3}, /* cost of storing integer registers */
1191 4, /* cost of reg,reg fld/fst */
1192 {4, 4, 12}, /* cost of loading fp registers
1193 in SFmode, DFmode and XFmode */
1194 {6, 6, 8}, /* cost of storing fp registers
1195 in SFmode, DFmode and XFmode */
1196 2, /* cost of moving MMX register */
1197 {3, 3}, /* cost of loading MMX registers
1198 in SImode and DImode */
1199 {4, 4}, /* cost of storing MMX registers
1200 in SImode and DImode */
1201 2, /* cost of moving SSE register */
1202 {4, 4, 3}, /* cost of loading SSE registers
1203 in SImode, DImode and TImode */
1204 {4, 4, 5}, /* cost of storing SSE registers
1205 in SImode, DImode and TImode */
1206 3, /* MMX or SSE register to integer */
1208 MOVD reg64, xmmreg Double FSTORE 4
1209 MOVD reg32, xmmreg Double FSTORE 4
1211 MOVD reg64, xmmreg Double FADD 3
1213 MOVD reg32, xmmreg Double FADD 3
1215 32, /* size of l1 cache. */
1216 2048, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 100, /* number of parallel prefetches */
1219 2, /* Branch cost */
1220 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1221 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1222 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1223 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1224 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1225 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1227 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1228 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1229 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1230 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1231 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1232 4, /* scalar_stmt_cost. */
1233 2, /* scalar load_cost. */
1234 2, /* scalar_store_cost. */
1235 6, /* vec_stmt_cost. */
1236 0, /* vec_to_scalar_cost. */
1237 2, /* scalar_to_vec_cost. */
1238 2, /* vec_align_load_cost. */
1239 2, /* vec_unalign_load_cost. */
1240 2, /* vec_store_cost. */
1241 2, /* cond_taken_branch_cost. */
1242 1, /* cond_not_taken_branch_cost. */
1246 struct processor_costs pentium4_cost
= {
1247 COSTS_N_INSNS (1), /* cost of an add instruction */
1248 COSTS_N_INSNS (3), /* cost of a lea instruction */
1249 COSTS_N_INSNS (4), /* variable shift costs */
1250 COSTS_N_INSNS (4), /* constant shift costs */
1251 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1252 COSTS_N_INSNS (15), /* HI */
1253 COSTS_N_INSNS (15), /* SI */
1254 COSTS_N_INSNS (15), /* DI */
1255 COSTS_N_INSNS (15)}, /* other */
1256 0, /* cost of multiply per each bit set */
1257 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1258 COSTS_N_INSNS (56), /* HI */
1259 COSTS_N_INSNS (56), /* SI */
1260 COSTS_N_INSNS (56), /* DI */
1261 COSTS_N_INSNS (56)}, /* other */
1262 COSTS_N_INSNS (1), /* cost of movsx */
1263 COSTS_N_INSNS (1), /* cost of movzx */
1264 16, /* "large" insn */
1266 2, /* cost for loading QImode using movzbl */
1267 {4, 5, 4}, /* cost of loading integer registers
1268 in QImode, HImode and SImode.
1269 Relative to reg-reg move (2). */
1270 {2, 3, 2}, /* cost of storing integer registers */
1271 2, /* cost of reg,reg fld/fst */
1272 {2, 2, 6}, /* cost of loading fp registers
1273 in SFmode, DFmode and XFmode */
1274 {4, 4, 6}, /* cost of storing fp registers
1275 in SFmode, DFmode and XFmode */
1276 2, /* cost of moving MMX register */
1277 {2, 2}, /* cost of loading MMX registers
1278 in SImode and DImode */
1279 {2, 2}, /* cost of storing MMX registers
1280 in SImode and DImode */
1281 12, /* cost of moving SSE register */
1282 {12, 12, 12}, /* cost of loading SSE registers
1283 in SImode, DImode and TImode */
1284 {2, 2, 8}, /* cost of storing SSE registers
1285 in SImode, DImode and TImode */
1286 10, /* MMX or SSE register to integer */
1287 8, /* size of l1 cache. */
1288 256, /* size of l2 cache. */
1289 64, /* size of prefetch block */
1290 6, /* number of parallel prefetches */
1291 2, /* Branch cost */
1292 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1293 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1294 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1295 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1296 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1297 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1298 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1299 DUMMY_STRINGOP_ALGS
},
1300 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1302 DUMMY_STRINGOP_ALGS
},
1303 1, /* scalar_stmt_cost. */
1304 1, /* scalar load_cost. */
1305 1, /* scalar_store_cost. */
1306 1, /* vec_stmt_cost. */
1307 1, /* vec_to_scalar_cost. */
1308 1, /* scalar_to_vec_cost. */
1309 1, /* vec_align_load_cost. */
1310 2, /* vec_unalign_load_cost. */
1311 1, /* vec_store_cost. */
1312 3, /* cond_taken_branch_cost. */
1313 1, /* cond_not_taken_branch_cost. */
1317 struct processor_costs nocona_cost
= {
1318 COSTS_N_INSNS (1), /* cost of an add instruction */
1319 COSTS_N_INSNS (1), /* cost of a lea instruction */
1320 COSTS_N_INSNS (1), /* variable shift costs */
1321 COSTS_N_INSNS (1), /* constant shift costs */
1322 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1323 COSTS_N_INSNS (10), /* HI */
1324 COSTS_N_INSNS (10), /* SI */
1325 COSTS_N_INSNS (10), /* DI */
1326 COSTS_N_INSNS (10)}, /* other */
1327 0, /* cost of multiply per each bit set */
1328 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1329 COSTS_N_INSNS (66), /* HI */
1330 COSTS_N_INSNS (66), /* SI */
1331 COSTS_N_INSNS (66), /* DI */
1332 COSTS_N_INSNS (66)}, /* other */
1333 COSTS_N_INSNS (1), /* cost of movsx */
1334 COSTS_N_INSNS (1), /* cost of movzx */
1335 16, /* "large" insn */
1336 17, /* MOVE_RATIO */
1337 4, /* cost for loading QImode using movzbl */
1338 {4, 4, 4}, /* cost of loading integer registers
1339 in QImode, HImode and SImode.
1340 Relative to reg-reg move (2). */
1341 {4, 4, 4}, /* cost of storing integer registers */
1342 3, /* cost of reg,reg fld/fst */
1343 {12, 12, 12}, /* cost of loading fp registers
1344 in SFmode, DFmode and XFmode */
1345 {4, 4, 4}, /* cost of storing fp registers
1346 in SFmode, DFmode and XFmode */
1347 6, /* cost of moving MMX register */
1348 {12, 12}, /* cost of loading MMX registers
1349 in SImode and DImode */
1350 {12, 12}, /* cost of storing MMX registers
1351 in SImode and DImode */
1352 6, /* cost of moving SSE register */
1353 {12, 12, 12}, /* cost of loading SSE registers
1354 in SImode, DImode and TImode */
1355 {12, 12, 12}, /* cost of storing SSE registers
1356 in SImode, DImode and TImode */
1357 8, /* MMX or SSE register to integer */
1358 8, /* size of l1 cache. */
1359 1024, /* size of l2 cache. */
1360 128, /* size of prefetch block */
1361 8, /* number of parallel prefetches */
1362 1, /* Branch cost */
1363 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1364 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1365 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1366 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1367 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1368 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1369 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1370 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1371 {100000, unrolled_loop
}, {-1, libcall
}}}},
1372 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1374 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1375 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1376 1, /* scalar_stmt_cost. */
1377 1, /* scalar load_cost. */
1378 1, /* scalar_store_cost. */
1379 1, /* vec_stmt_cost. */
1380 1, /* vec_to_scalar_cost. */
1381 1, /* scalar_to_vec_cost. */
1382 1, /* vec_align_load_cost. */
1383 2, /* vec_unalign_load_cost. */
1384 1, /* vec_store_cost. */
1385 3, /* cond_taken_branch_cost. */
1386 1, /* cond_not_taken_branch_cost. */
1390 struct processor_costs atom_cost
= {
1391 COSTS_N_INSNS (1), /* cost of an add instruction */
1392 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1393 COSTS_N_INSNS (1), /* variable shift costs */
1394 COSTS_N_INSNS (1), /* constant shift costs */
1395 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1396 COSTS_N_INSNS (4), /* HI */
1397 COSTS_N_INSNS (3), /* SI */
1398 COSTS_N_INSNS (4), /* DI */
1399 COSTS_N_INSNS (2)}, /* other */
1400 0, /* cost of multiply per each bit set */
1401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1402 COSTS_N_INSNS (26), /* HI */
1403 COSTS_N_INSNS (42), /* SI */
1404 COSTS_N_INSNS (74), /* DI */
1405 COSTS_N_INSNS (74)}, /* other */
1406 COSTS_N_INSNS (1), /* cost of movsx */
1407 COSTS_N_INSNS (1), /* cost of movzx */
1408 8, /* "large" insn */
1409 17, /* MOVE_RATIO */
1410 4, /* cost for loading QImode using movzbl */
1411 {4, 4, 4}, /* cost of loading integer registers
1412 in QImode, HImode and SImode.
1413 Relative to reg-reg move (2). */
1414 {4, 4, 4}, /* cost of storing integer registers */
1415 4, /* cost of reg,reg fld/fst */
1416 {12, 12, 12}, /* cost of loading fp registers
1417 in SFmode, DFmode and XFmode */
1418 {6, 6, 8}, /* cost of storing fp registers
1419 in SFmode, DFmode and XFmode */
1420 2, /* cost of moving MMX register */
1421 {8, 8}, /* cost of loading MMX registers
1422 in SImode and DImode */
1423 {8, 8}, /* cost of storing MMX registers
1424 in SImode and DImode */
1425 2, /* cost of moving SSE register */
1426 {8, 8, 8}, /* cost of loading SSE registers
1427 in SImode, DImode and TImode */
1428 {8, 8, 8}, /* cost of storing SSE registers
1429 in SImode, DImode and TImode */
1430 5, /* MMX or SSE register to integer */
1431 32, /* size of l1 cache. */
1432 256, /* size of l2 cache. */
1433 64, /* size of prefetch block */
1434 6, /* number of parallel prefetches */
1435 3, /* Branch cost */
1436 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1437 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1438 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1439 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1440 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1441 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1442 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1443 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1444 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1445 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1446 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1447 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1448 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1449 1, /* scalar_stmt_cost. */
1450 1, /* scalar load_cost. */
1451 1, /* scalar_store_cost. */
1452 1, /* vec_stmt_cost. */
1453 1, /* vec_to_scalar_cost. */
1454 1, /* scalar_to_vec_cost. */
1455 1, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 1, /* vec_store_cost. */
1458 3, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1462 /* Generic64 should produce code tuned for Nocona and K8. */
1464 struct processor_costs generic64_cost
= {
1465 COSTS_N_INSNS (1), /* cost of an add instruction */
1466 /* On all chips taken into consideration lea is 2 cycles and more. With
1467 this cost however our current implementation of synth_mult results in
1468 use of unnecessary temporary registers causing regression on several
1469 SPECfp benchmarks. */
1470 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1471 COSTS_N_INSNS (1), /* variable shift costs */
1472 COSTS_N_INSNS (1), /* constant shift costs */
1473 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1474 COSTS_N_INSNS (4), /* HI */
1475 COSTS_N_INSNS (3), /* SI */
1476 COSTS_N_INSNS (4), /* DI */
1477 COSTS_N_INSNS (2)}, /* other */
1478 0, /* cost of multiply per each bit set */
1479 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1480 COSTS_N_INSNS (26), /* HI */
1481 COSTS_N_INSNS (42), /* SI */
1482 COSTS_N_INSNS (74), /* DI */
1483 COSTS_N_INSNS (74)}, /* other */
1484 COSTS_N_INSNS (1), /* cost of movsx */
1485 COSTS_N_INSNS (1), /* cost of movzx */
1486 8, /* "large" insn */
1487 17, /* MOVE_RATIO */
1488 4, /* cost for loading QImode using movzbl */
1489 {4, 4, 4}, /* cost of loading integer registers
1490 in QImode, HImode and SImode.
1491 Relative to reg-reg move (2). */
1492 {4, 4, 4}, /* cost of storing integer registers */
1493 4, /* cost of reg,reg fld/fst */
1494 {12, 12, 12}, /* cost of loading fp registers
1495 in SFmode, DFmode and XFmode */
1496 {6, 6, 8}, /* cost of storing fp registers
1497 in SFmode, DFmode and XFmode */
1498 2, /* cost of moving MMX register */
1499 {8, 8}, /* cost of loading MMX registers
1500 in SImode and DImode */
1501 {8, 8}, /* cost of storing MMX registers
1502 in SImode and DImode */
1503 2, /* cost of moving SSE register */
1504 {8, 8, 8}, /* cost of loading SSE registers
1505 in SImode, DImode and TImode */
1506 {8, 8, 8}, /* cost of storing SSE registers
1507 in SImode, DImode and TImode */
1508 5, /* MMX or SSE register to integer */
1509 32, /* size of l1 cache. */
1510 512, /* size of l2 cache. */
1511 64, /* size of prefetch block */
1512 6, /* number of parallel prefetches */
1513 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1514 value is increased to perhaps more appropriate value of 5. */
1515 3, /* Branch cost */
1516 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1517 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1518 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1519 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1520 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1521 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1522 {DUMMY_STRINGOP_ALGS
,
1523 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1524 {DUMMY_STRINGOP_ALGS
,
1525 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1526 1, /* scalar_stmt_cost. */
1527 1, /* scalar load_cost. */
1528 1, /* scalar_store_cost. */
1529 1, /* vec_stmt_cost. */
1530 1, /* vec_to_scalar_cost. */
1531 1, /* scalar_to_vec_cost. */
1532 1, /* vec_align_load_cost. */
1533 2, /* vec_unalign_load_cost. */
1534 1, /* vec_store_cost. */
1535 3, /* cond_taken_branch_cost. */
1536 1, /* cond_not_taken_branch_cost. */
1539 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1542 struct processor_costs generic32_cost
= {
1543 COSTS_N_INSNS (1), /* cost of an add instruction */
1544 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1545 COSTS_N_INSNS (1), /* variable shift costs */
1546 COSTS_N_INSNS (1), /* constant shift costs */
1547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1548 COSTS_N_INSNS (4), /* HI */
1549 COSTS_N_INSNS (3), /* SI */
1550 COSTS_N_INSNS (4), /* DI */
1551 COSTS_N_INSNS (2)}, /* other */
1552 0, /* cost of multiply per each bit set */
1553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1554 COSTS_N_INSNS (26), /* HI */
1555 COSTS_N_INSNS (42), /* SI */
1556 COSTS_N_INSNS (74), /* DI */
1557 COSTS_N_INSNS (74)}, /* other */
1558 COSTS_N_INSNS (1), /* cost of movsx */
1559 COSTS_N_INSNS (1), /* cost of movzx */
1560 8, /* "large" insn */
1561 17, /* MOVE_RATIO */
1562 4, /* cost for loading QImode using movzbl */
1563 {4, 4, 4}, /* cost of loading integer registers
1564 in QImode, HImode and SImode.
1565 Relative to reg-reg move (2). */
1566 {4, 4, 4}, /* cost of storing integer registers */
1567 4, /* cost of reg,reg fld/fst */
1568 {12, 12, 12}, /* cost of loading fp registers
1569 in SFmode, DFmode and XFmode */
1570 {6, 6, 8}, /* cost of storing fp registers
1571 in SFmode, DFmode and XFmode */
1572 2, /* cost of moving MMX register */
1573 {8, 8}, /* cost of loading MMX registers
1574 in SImode and DImode */
1575 {8, 8}, /* cost of storing MMX registers
1576 in SImode and DImode */
1577 2, /* cost of moving SSE register */
1578 {8, 8, 8}, /* cost of loading SSE registers
1579 in SImode, DImode and TImode */
1580 {8, 8, 8}, /* cost of storing SSE registers
1581 in SImode, DImode and TImode */
1582 5, /* MMX or SSE register to integer */
1583 32, /* size of l1 cache. */
1584 256, /* size of l2 cache. */
1585 64, /* size of prefetch block */
1586 6, /* number of parallel prefetches */
1587 3, /* Branch cost */
1588 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1589 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1590 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1591 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1592 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1593 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1594 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1595 DUMMY_STRINGOP_ALGS
},
1596 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1597 DUMMY_STRINGOP_ALGS
},
1598 1, /* scalar_stmt_cost. */
1599 1, /* scalar load_cost. */
1600 1, /* scalar_store_cost. */
1601 1, /* vec_stmt_cost. */
1602 1, /* vec_to_scalar_cost. */
1603 1, /* scalar_to_vec_cost. */
1604 1, /* vec_align_load_cost. */
1605 2, /* vec_unalign_load_cost. */
1606 1, /* vec_store_cost. */
1607 3, /* cond_taken_branch_cost. */
1608 1, /* cond_not_taken_branch_cost. */
1611 /* Set by -mtune. */
1612 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1614 /* Set by -mtune or -Os. */
1615 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1617 /* Processor feature/optimization bitmasks. */
1618 #define m_386 (1<<PROCESSOR_I386)
1619 #define m_486 (1<<PROCESSOR_I486)
1620 #define m_PENT (1<<PROCESSOR_PENTIUM)
1621 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1622 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1623 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1624 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1625 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1626 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1627 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1628 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1629 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1630 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1631 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1632 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1633 #define m_ATOM (1<<PROCESSOR_ATOM)
1635 #define m_GEODE (1<<PROCESSOR_GEODE)
1636 #define m_K6 (1<<PROCESSOR_K6)
1637 #define m_K6_GEODE (m_K6 | m_GEODE)
1638 #define m_K8 (1<<PROCESSOR_K8)
1639 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1640 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1641 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1642 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1643 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1644 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1645 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1646 #define m_BTVER (m_BTVER1 | m_BTVER2)
1647 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1648 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1649 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1651 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1652 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1654 /* Generic instruction choice should be common subset of supported CPUs
1655 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1656 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1658 /* Feature tests against the various tunings. */
1659 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1661 /* Feature tests against the various tunings used to create ix86_tune_features
1662 based on the processor mask. */
1663 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1664 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1665 negatively, so enabling for Generic64 seems like good code size
1666 tradeoff. We can't enable it for 32bit generic because it does not
1667 work well with PPro base chips. */
1668 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1670 /* X86_TUNE_PUSH_MEMORY */
1671 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1673 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1676 /* X86_TUNE_UNROLL_STRLEN */
1677 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1679 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1680 on simulation result. But after P4 was made, no performance benefit
1681 was observed with branch hints. It also increases the code size.
1682 As a result, icc never generates branch hints. */
1685 /* X86_TUNE_DOUBLE_WITH_ADD */
1688 /* X86_TUNE_USE_SAHF */
1689 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1691 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1692 partial dependencies. */
1693 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1695 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1696 register stalls on Generic32 compilation setting as well. However
1697 in current implementation the partial register stalls are not eliminated
1698 very well - they can be introduced via subregs synthesized by combine
1699 and can happen in caller/callee saving sequences. Because this option
1700 pays back little on PPro based chips and is in conflict with partial reg
1701 dependencies used by Athlon/P4 based chips, it is better to leave it off
1702 for generic32 for now. */
1705 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1706 m_CORE2I7
| m_GENERIC
,
1708 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1709 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1710 m_CORE2I7
| m_GENERIC
,
1712 /* X86_TUNE_USE_HIMODE_FIOP */
1713 m_386
| m_486
| m_K6_GEODE
,
1715 /* X86_TUNE_USE_SIMODE_FIOP */
1716 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1718 /* X86_TUNE_USE_MOV0 */
1721 /* X86_TUNE_USE_CLTD */
1722 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1724 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1727 /* X86_TUNE_SPLIT_LONG_MOVES */
1730 /* X86_TUNE_READ_MODIFY_WRITE */
1733 /* X86_TUNE_READ_MODIFY */
1736 /* X86_TUNE_PROMOTE_QIMODE */
1737 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1739 /* X86_TUNE_FAST_PREFIX */
1740 ~(m_386
| m_486
| m_PENT
),
1742 /* X86_TUNE_SINGLE_STRINGOP */
1743 m_386
| m_P4_NOCONA
,
1745 /* X86_TUNE_QIMODE_MATH */
1748 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1749 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1750 might be considered for Generic32 if our scheme for avoiding partial
1751 stalls was more effective. */
1754 /* X86_TUNE_PROMOTE_QI_REGS */
1757 /* X86_TUNE_PROMOTE_HI_REGS */
1760 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1761 over esp addition. */
1762 m_386
| m_486
| m_PENT
| m_PPRO
,
1764 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1765 over esp addition. */
1768 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1769 over esp subtraction. */
1770 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1772 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1773 over esp subtraction. */
1774 m_PENT
| m_K6_GEODE
,
1776 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1777 for DFmode copies */
1778 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
1780 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1781 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1783 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1784 conflict here in between PPro/Pentium4 based chips that thread 128bit
1785 SSE registers as single units versus K8 based chips that divide SSE
1786 registers to two 64bit halves. This knob promotes all store destinations
1787 to be 128bit to allow register renaming on 128bit SSE units, but usually
1788 results in one extra microop on 64bit SSE units. Experimental results
1789 shows that disabling this option on P4 brings over 20% SPECfp regression,
1790 while enabling it on K8 brings roughly 2.4% regression that can be partly
1791 masked by careful scheduling of moves. */
1792 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
1794 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1795 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
1797 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1800 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1803 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1804 are resolved on SSE register parts instead of whole registers, so we may
1805 maintain just lower part of scalar values in proper format leaving the
1806 upper part undefined. */
1809 /* X86_TUNE_SSE_TYPELESS_STORES */
1812 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1813 m_PPRO
| m_P4_NOCONA
,
1815 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1816 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1818 /* X86_TUNE_PROLOGUE_USING_MOVE */
1819 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
1821 /* X86_TUNE_EPILOGUE_USING_MOVE */
1822 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
1824 /* X86_TUNE_SHIFT1 */
1827 /* X86_TUNE_USE_FFREEP */
1830 /* X86_TUNE_INTER_UNIT_MOVES */
1831 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1833 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1834 ~(m_AMDFAM10
| m_BDVER
),
1836 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1837 than 4 branch instructions in the 16 byte window. */
1838 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1840 /* X86_TUNE_SCHEDULE */
1841 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1843 /* X86_TUNE_USE_BT */
1844 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1846 /* X86_TUNE_USE_INCDEC */
1847 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
1849 /* X86_TUNE_PAD_RETURNS */
1850 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
1852 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1855 /* X86_TUNE_EXT_80387_CONSTANTS */
1856 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
1858 /* X86_TUNE_AVOID_VECTOR_DECODE */
1859 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
1861 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1862 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1865 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1866 vector path on AMD machines. */
1867 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1869 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1871 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1873 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1877 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1878 but one byte longer. */
1881 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1882 operand that cannot be represented using a modRM byte. The XOR
1883 replacement is long decoded, so this split helps here as well. */
1886 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1888 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
1890 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1891 from integer to FP. */
1894 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1895 with a subsequent conditional jump instruction into a single
1896 compare-and-branch uop. */
1899 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1900 will impact LEA instruction selection. */
1903 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1907 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
1908 at -O3. For the moment, the prefetching seems badly tuned for Intel
1910 m_K6_GEODE
| m_AMD_MULTIPLE
,
1912 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
1913 the auto-vectorizer. */
1916 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
1917 during reassociation of integer computation. */
1920 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
1921 during reassociation of fp computation. */
1924 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
1925 regs instead of memory. */
1926 m_COREI7
| m_CORE2I7
1929 /* Feature tests against the various architecture variations. */
1930 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1932 /* Feature tests against the various architecture variations, used to create
1933 ix86_arch_features based on the processor mask. */
1934 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1935 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1936 ~(m_386
| m_486
| m_PENT
| m_K6
),
1938 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1941 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1944 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1947 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1951 static const unsigned int x86_accumulate_outgoing_args
1952 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
1954 static const unsigned int x86_arch_always_fancy_math_387
1955 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
1957 static const unsigned int x86_avx256_split_unaligned_load
1958 = m_COREI7
| m_GENERIC
;
1960 static const unsigned int x86_avx256_split_unaligned_store
1961 = m_COREI7
| m_BDVER
| m_GENERIC
;
1963 /* In case the average insn count for single function invocation is
1964 lower than this constant, emit fast (but longer) prologue and
1966 #define FAST_PROLOGUE_INSN_COUNT 20
1968 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1969 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1970 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1971 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1973 /* Array of the smallest class containing reg number REGNO, indexed by
1974 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1976 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1978 /* ax, dx, cx, bx */
1979 AREG
, DREG
, CREG
, BREG
,
1980 /* si, di, bp, sp */
1981 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1983 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1984 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1987 /* flags, fpsr, fpcr, frame */
1988 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1990 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1993 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1996 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1997 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1998 /* SSE REX registers */
1999 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2003 /* The "default" register map used in 32bit mode. */
2005 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2007 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2008 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2009 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2010 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2011 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2012 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2013 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2016 /* The "default" register map used in 64bit mode. */
2018 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2020 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2021 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2022 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2023 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2024 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2025 8,9,10,11,12,13,14,15, /* extended integer registers */
2026 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2029 /* Define the register numbers to be used in Dwarf debugging information.
2030 The SVR4 reference port C compiler uses the following register numbers
2031 in its Dwarf output code:
2032 0 for %eax (gcc regno = 0)
2033 1 for %ecx (gcc regno = 2)
2034 2 for %edx (gcc regno = 1)
2035 3 for %ebx (gcc regno = 3)
2036 4 for %esp (gcc regno = 7)
2037 5 for %ebp (gcc regno = 6)
2038 6 for %esi (gcc regno = 4)
2039 7 for %edi (gcc regno = 5)
2040 The following three DWARF register numbers are never generated by
2041 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2042 believes these numbers have these meanings.
2043 8 for %eip (no gcc equivalent)
2044 9 for %eflags (gcc regno = 17)
2045 10 for %trapno (no gcc equivalent)
2046 It is not at all clear how we should number the FP stack registers
2047 for the x86 architecture. If the version of SDB on x86/svr4 were
2048 a bit less brain dead with respect to floating-point then we would
2049 have a precedent to follow with respect to DWARF register numbers
2050 for x86 FP registers, but the SDB on x86/svr4 is so completely
2051 broken with respect to FP registers that it is hardly worth thinking
2052 of it as something to strive for compatibility with.
2053 The version of x86/svr4 SDB I have at the moment does (partially)
2054 seem to believe that DWARF register number 11 is associated with
2055 the x86 register %st(0), but that's about all. Higher DWARF
2056 register numbers don't seem to be associated with anything in
2057 particular, and even for DWARF regno 11, SDB only seems to under-
2058 stand that it should say that a variable lives in %st(0) (when
2059 asked via an `=' command) if we said it was in DWARF regno 11,
2060 but SDB still prints garbage when asked for the value of the
2061 variable in question (via a `/' command).
2062 (Also note that the labels SDB prints for various FP stack regs
2063 when doing an `x' command are all wrong.)
2064 Note that these problems generally don't affect the native SVR4
2065 C compiler because it doesn't allow the use of -O with -g and
2066 because when it is *not* optimizing, it allocates a memory
2067 location for each floating-point variable, and the memory
2068 location is what gets described in the DWARF AT_location
2069 attribute for the variable in question.
2070 Regardless of the severe mental illness of the x86/svr4 SDB, we
2071 do something sensible here and we use the following DWARF
2072 register numbers. Note that these are all stack-top-relative
2074 11 for %st(0) (gcc regno = 8)
2075 12 for %st(1) (gcc regno = 9)
2076 13 for %st(2) (gcc regno = 10)
2077 14 for %st(3) (gcc regno = 11)
2078 15 for %st(4) (gcc regno = 12)
2079 16 for %st(5) (gcc regno = 13)
2080 17 for %st(6) (gcc regno = 14)
2081 18 for %st(7) (gcc regno = 15)
2083 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2085 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2086 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2087 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2088 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2089 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2090 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2091 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2094 /* Define parameter passing and return registers. */
2096 static int const x86_64_int_parameter_registers
[6] =
2098 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2101 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2103 CX_REG
, DX_REG
, R8_REG
, R9_REG
2106 static int const x86_64_int_return_registers
[4] =
2108 AX_REG
, DX_REG
, DI_REG
, SI_REG
2111 /* Define the structure for the machine field in struct function. */
2113 struct GTY(()) stack_local_entry
{
2114 unsigned short mode
;
2117 struct stack_local_entry
*next
;
2120 /* Structure describing stack frame layout.
2121 Stack grows downward:
2127 saved static chain if ix86_static_chain_on_stack
2129 saved frame pointer if frame_pointer_needed
2130 <- HARD_FRAME_POINTER
2136 <- sse_regs_save_offset
2139 [va_arg registers] |
2143 [padding2] | = to_allocate
2152 int outgoing_arguments_size
;
2154 /* The offsets relative to ARG_POINTER. */
2155 HOST_WIDE_INT frame_pointer_offset
;
2156 HOST_WIDE_INT hard_frame_pointer_offset
;
2157 HOST_WIDE_INT stack_pointer_offset
;
2158 HOST_WIDE_INT hfp_save_offset
;
2159 HOST_WIDE_INT reg_save_offset
;
2160 HOST_WIDE_INT sse_reg_save_offset
;
2162 /* When save_regs_using_mov is set, emit prologue using
2163 move instead of push instructions. */
2164 bool save_regs_using_mov
;
2167 /* Which cpu are we scheduling for. */
2168 enum attr_cpu ix86_schedule
;
2170 /* Which cpu are we optimizing for. */
2171 enum processor_type ix86_tune
;
2173 /* Which instruction set architecture to use. */
2174 enum processor_type ix86_arch
;
2176 /* True if processor has SSE prefetch instruction. */
2177 unsigned char x86_prefetch_sse
;
2179 /* -mstackrealign option */
2180 static const char ix86_force_align_arg_pointer_string
[]
2181 = "force_align_arg_pointer";
2183 static rtx (*ix86_gen_leave
) (void);
2184 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2185 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2186 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2187 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2188 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2189 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2190 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2191 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2192 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2193 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2194 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2196 /* Preferred alignment for stack boundary in bits. */
2197 unsigned int ix86_preferred_stack_boundary
;
2199 /* Alignment for incoming stack boundary in bits specified at
2201 static unsigned int ix86_user_incoming_stack_boundary
;
2203 /* Default alignment for incoming stack boundary in bits. */
2204 static unsigned int ix86_default_incoming_stack_boundary
;
2206 /* Alignment for incoming stack boundary in bits. */
2207 unsigned int ix86_incoming_stack_boundary
;
2209 /* Calling abi specific va_list type nodes. */
2210 static GTY(()) tree sysv_va_list_type_node
;
2211 static GTY(()) tree ms_va_list_type_node
;
2213 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2214 char internal_label_prefix
[16];
2215 int internal_label_prefix_len
;
2217 /* Fence to use after loop using movnt. */
2220 /* Register class used for passing given 64bit part of the argument.
2221 These represent classes as documented by the PS ABI, with the exception
2222 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2223 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2225 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2226 whenever possible (upper half does contain padding). */
2227 enum x86_64_reg_class
2230 X86_64_INTEGER_CLASS
,
2231 X86_64_INTEGERSI_CLASS
,
2238 X86_64_COMPLEX_X87_CLASS
,
2242 #define MAX_CLASSES 4
2244 /* Table of constants used by fldpi, fldln2, etc.... */
2245 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2246 static bool ext_80387_constants_init
= 0;
2249 static struct machine_function
* ix86_init_machine_status (void);
2250 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2251 static bool ix86_function_value_regno_p (const unsigned int);
2252 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2254 static rtx
ix86_static_chain (const_tree
, bool);
2255 static int ix86_function_regparm (const_tree
, const_tree
);
2256 static void ix86_compute_frame_layout (struct ix86_frame
*);
2257 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2259 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2260 static tree
ix86_canonical_va_list_type (tree
);
2261 static void predict_jump (int);
2262 static unsigned int split_stack_prologue_scratch_regno (void);
2263 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2265 enum ix86_function_specific_strings
2267 IX86_FUNCTION_SPECIFIC_ARCH
,
2268 IX86_FUNCTION_SPECIFIC_TUNE
,
2269 IX86_FUNCTION_SPECIFIC_MAX
2272 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2273 const char *, enum fpmath_unit
, bool);
2274 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2275 static void ix86_function_specific_save (struct cl_target_option
*);
2276 static void ix86_function_specific_restore (struct cl_target_option
*);
2277 static void ix86_function_specific_print (FILE *, int,
2278 struct cl_target_option
*);
2279 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2280 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2281 struct gcc_options
*);
2282 static bool ix86_can_inline_p (tree
, tree
);
2283 static void ix86_set_current_function (tree
);
2284 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2286 static enum calling_abi
ix86_function_abi (const_tree
);
2289 #ifndef SUBTARGET32_DEFAULT_CPU
2290 #define SUBTARGET32_DEFAULT_CPU "i386"
2293 /* The svr4 ABI for the i386 says that records and unions are returned
2295 #ifndef DEFAULT_PCC_STRUCT_RETURN
2296 #define DEFAULT_PCC_STRUCT_RETURN 1
2299 /* Whether -mtune= or -march= were specified */
2300 static int ix86_tune_defaulted
;
2301 static int ix86_arch_specified
;
2303 /* Vectorization library interface and handlers. */
2304 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2306 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2307 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2309 /* Processor target table, indexed by processor number */
2312 const struct processor_costs
*cost
; /* Processor costs */
2313 const int align_loop
; /* Default alignments. */
2314 const int align_loop_max_skip
;
2315 const int align_jump
;
2316 const int align_jump_max_skip
;
2317 const int align_func
;
2320 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2322 {&i386_cost
, 4, 3, 4, 3, 4},
2323 {&i486_cost
, 16, 15, 16, 15, 16},
2324 {&pentium_cost
, 16, 7, 16, 7, 16},
2325 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2326 {&geode_cost
, 0, 0, 0, 0, 0},
2327 {&k6_cost
, 32, 7, 32, 7, 32},
2328 {&athlon_cost
, 16, 7, 16, 7, 16},
2329 {&pentium4_cost
, 0, 0, 0, 0, 0},
2330 {&k8_cost
, 16, 7, 16, 7, 16},
2331 {&nocona_cost
, 0, 0, 0, 0, 0},
2332 /* Core 2 32-bit. */
2333 {&generic32_cost
, 16, 10, 16, 10, 16},
2334 /* Core 2 64-bit. */
2335 {&generic64_cost
, 16, 10, 16, 10, 16},
2336 /* Core i7 32-bit. */
2337 {&generic32_cost
, 16, 10, 16, 10, 16},
2338 /* Core i7 64-bit. */
2339 {&generic64_cost
, 16, 10, 16, 10, 16},
2340 {&generic32_cost
, 16, 7, 16, 7, 16},
2341 {&generic64_cost
, 16, 10, 16, 10, 16},
2342 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2343 {&bdver1_cost
, 32, 24, 32, 7, 32},
2344 {&bdver2_cost
, 32, 24, 32, 7, 32},
2345 {&bdver3_cost
, 32, 24, 32, 7, 32},
2346 {&btver1_cost
, 32, 24, 32, 7, 32},
2347 {&btver2_cost
, 32, 24, 32, 7, 32},
2348 {&atom_cost
, 16, 15, 16, 7, 16}
2351 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2384 gate_insert_vzeroupper (void)
2386 return TARGET_VZEROUPPER
;
2390 rest_of_handle_insert_vzeroupper (void)
2394 /* vzeroupper instructions are inserted immediately after reload to
2395 account for possible spills from 256bit registers. The pass
2396 reuses mode switching infrastructure by re-running mode insertion
2397 pass, so disable entities that have already been processed. */
2398 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2399 ix86_optimize_mode_switching
[i
] = 0;
2401 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2403 /* Call optimize_mode_switching. */
2404 pass_mode_switching
.pass
.execute ();
2408 struct rtl_opt_pass pass_insert_vzeroupper
=
2412 "vzeroupper", /* name */
2413 OPTGROUP_NONE
, /* optinfo_flags */
2414 gate_insert_vzeroupper
, /* gate */
2415 rest_of_handle_insert_vzeroupper
, /* execute */
2418 0, /* static_pass_number */
2419 TV_NONE
, /* tv_id */
2420 0, /* properties_required */
2421 0, /* properties_provided */
2422 0, /* properties_destroyed */
2423 0, /* todo_flags_start */
2424 TODO_df_finish
| TODO_verify_rtl_sharing
|
2425 0, /* todo_flags_finish */
2429 /* Return true if a red-zone is in use. */
2432 ix86_using_red_zone (void)
2434 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2437 /* Return a string that documents the current -m options. The caller is
2438 responsible for freeing the string. */
2441 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2442 const char *tune
, enum fpmath_unit fpmath
,
2445 struct ix86_target_opts
2447 const char *option
; /* option string */
2448 HOST_WIDE_INT mask
; /* isa mask options */
2451 /* This table is ordered so that options like -msse4.2 that imply
2452 preceding options while match those first. */
2453 static struct ix86_target_opts isa_opts
[] =
2455 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2456 { "-mfma", OPTION_MASK_ISA_FMA
},
2457 { "-mxop", OPTION_MASK_ISA_XOP
},
2458 { "-mlwp", OPTION_MASK_ISA_LWP
},
2459 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2460 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2461 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2462 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2463 { "-msse3", OPTION_MASK_ISA_SSE3
},
2464 { "-msse2", OPTION_MASK_ISA_SSE2
},
2465 { "-msse", OPTION_MASK_ISA_SSE
},
2466 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2467 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2468 { "-mmmx", OPTION_MASK_ISA_MMX
},
2469 { "-mabm", OPTION_MASK_ISA_ABM
},
2470 { "-mbmi", OPTION_MASK_ISA_BMI
},
2471 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2472 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2473 { "-mhle", OPTION_MASK_ISA_HLE
},
2474 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2475 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2476 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2477 { "-madx", OPTION_MASK_ISA_ADX
},
2478 { "-mtbm", OPTION_MASK_ISA_TBM
},
2479 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2480 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2481 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2482 { "-maes", OPTION_MASK_ISA_AES
},
2483 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2484 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2485 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2486 { "-mf16c", OPTION_MASK_ISA_F16C
},
2487 { "-mrtm", OPTION_MASK_ISA_RTM
},
2488 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2489 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2493 static struct ix86_target_opts flag_opts
[] =
2495 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2496 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2497 { "-m80387", MASK_80387
},
2498 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2499 { "-malign-double", MASK_ALIGN_DOUBLE
},
2500 { "-mcld", MASK_CLD
},
2501 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2502 { "-mieee-fp", MASK_IEEE_FP
},
2503 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2504 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2505 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2506 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2507 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2508 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2509 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2510 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2511 { "-mrecip", MASK_RECIP
},
2512 { "-mrtd", MASK_RTD
},
2513 { "-msseregparm", MASK_SSEREGPARM
},
2514 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2515 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2516 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2517 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2518 { "-mvzeroupper", MASK_VZEROUPPER
},
2519 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2520 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2521 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2524 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2527 char target_other
[40];
2537 memset (opts
, '\0', sizeof (opts
));
2539 /* Add -march= option. */
2542 opts
[num
][0] = "-march=";
2543 opts
[num
++][1] = arch
;
2546 /* Add -mtune= option. */
2549 opts
[num
][0] = "-mtune=";
2550 opts
[num
++][1] = tune
;
2553 /* Add -m32/-m64/-mx32. */
2554 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2556 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2560 isa
&= ~ (OPTION_MASK_ISA_64BIT
2561 | OPTION_MASK_ABI_64
2562 | OPTION_MASK_ABI_X32
);
2566 opts
[num
++][0] = abi
;
2568 /* Pick out the options in isa options. */
2569 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2571 if ((isa
& isa_opts
[i
].mask
) != 0)
2573 opts
[num
++][0] = isa_opts
[i
].option
;
2574 isa
&= ~ isa_opts
[i
].mask
;
2578 if (isa
&& add_nl_p
)
2580 opts
[num
++][0] = isa_other
;
2581 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2585 /* Add flag options. */
2586 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2588 if ((flags
& flag_opts
[i
].mask
) != 0)
2590 opts
[num
++][0] = flag_opts
[i
].option
;
2591 flags
&= ~ flag_opts
[i
].mask
;
2595 if (flags
&& add_nl_p
)
2597 opts
[num
++][0] = target_other
;
2598 sprintf (target_other
, "(other flags: %#x)", flags
);
2601 /* Add -fpmath= option. */
2604 opts
[num
][0] = "-mfpmath=";
2605 switch ((int) fpmath
)
2608 opts
[num
++][1] = "387";
2612 opts
[num
++][1] = "sse";
2615 case FPMATH_387
| FPMATH_SSE
:
2616 opts
[num
++][1] = "sse+387";
2628 gcc_assert (num
< ARRAY_SIZE (opts
));
2630 /* Size the string. */
2632 sep_len
= (add_nl_p
) ? 3 : 1;
2633 for (i
= 0; i
< num
; i
++)
2636 for (j
= 0; j
< 2; j
++)
2638 len
+= strlen (opts
[i
][j
]);
2641 /* Build the string. */
2642 ret
= ptr
= (char *) xmalloc (len
);
2645 for (i
= 0; i
< num
; i
++)
2649 for (j
= 0; j
< 2; j
++)
2650 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2657 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2665 for (j
= 0; j
< 2; j
++)
2668 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2670 line_len
+= len2
[j
];
2675 gcc_assert (ret
+ len
>= ptr
);
2680 /* Return true, if profiling code should be emitted before
2681 prologue. Otherwise it returns false.
2682 Note: For x86 with "hotfix" it is sorried. */
2684 ix86_profile_before_prologue (void)
2686 return flag_fentry
!= 0;
2689 /* Function that is callable from the debugger to print the current
2692 ix86_debug_options (void)
2694 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2695 ix86_arch_string
, ix86_tune_string
,
2700 fprintf (stderr
, "%s\n\n", opts
);
2704 fputs ("<no options>\n\n", stderr
);
2709 /* Override various settings based on options. If MAIN_ARGS_P, the
2710 options are from the command line, otherwise they are from
2714 ix86_option_override_internal (bool main_args_p
)
2717 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2718 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2723 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2724 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2725 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2726 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2727 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2728 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2729 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2730 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2731 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2732 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2733 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2734 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2735 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2736 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2737 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2738 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2739 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2740 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2741 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2742 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2743 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2744 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2745 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2746 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2747 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2748 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2749 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2750 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2751 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2752 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2753 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2754 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2755 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2756 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2757 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2758 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2759 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2760 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2761 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2762 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2764 /* if this reaches 64, need to widen struct pta flags below */
2768 const char *const name
; /* processor name or nickname. */
2769 const enum processor_type processor
;
2770 const enum attr_cpu schedule
;
2771 const unsigned HOST_WIDE_INT flags
;
2773 const processor_alias_table
[] =
2775 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2776 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2777 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2778 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2779 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2780 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2781 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2782 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2783 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2784 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2785 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2786 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2787 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2788 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2789 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2790 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2791 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2792 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2793 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2794 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2795 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2796 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2797 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2798 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
2799 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2800 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2801 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
2802 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2803 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2804 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
2805 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2806 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2807 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_FXSR
},
2808 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2809 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2810 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2811 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
2812 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2813 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
2814 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2815 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2816 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2817 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2818 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
2819 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2820 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2821 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2822 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2823 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
2825 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2826 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2827 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
2828 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2829 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2830 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2831 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2832 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2833 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2834 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2835 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2836 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2837 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2838 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2839 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2840 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2841 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2842 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2843 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2844 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2845 {"k8", PROCESSOR_K8
, CPU_K8
,
2846 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2847 | PTA_SSE2
| PTA_NO_SAHF
},
2848 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2849 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2850 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2851 {"opteron", PROCESSOR_K8
, CPU_K8
,
2852 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2853 | PTA_SSE2
| PTA_NO_SAHF
},
2854 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
2855 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2856 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2857 {"athlon64", PROCESSOR_K8
, CPU_K8
,
2858 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2859 | PTA_SSE2
| PTA_NO_SAHF
},
2860 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
2861 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2862 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2863 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
2864 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2865 | PTA_SSE2
| PTA_NO_SAHF
},
2866 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2867 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2868 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2869 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2870 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2871 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2872 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
2873 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2874 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2875 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2876 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2877 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
2878 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2879 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2880 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2881 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2882 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2883 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
2884 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2885 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2886 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2887 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2888 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
2890 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
2891 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2892 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
2893 | PTA_FXSR
| PTA_XSAVE
},
2894 {"btver2", PROCESSOR_BTVER2
, CPU_GENERIC64
,
2895 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2896 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
2897 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2898 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
2899 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2901 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
2902 PTA_HLE
/* flags are only used for -march switch. */ },
2903 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
2905 | PTA_HLE
/* flags are only used for -march switch. */ },
2908 /* -mrecip options. */
2911 const char *string
; /* option name */
2912 unsigned int mask
; /* mask bits to set */
2914 const recip_options
[] =
2916 { "all", RECIP_MASK_ALL
},
2917 { "none", RECIP_MASK_NONE
},
2918 { "div", RECIP_MASK_DIV
},
2919 { "sqrt", RECIP_MASK_SQRT
},
2920 { "vec-div", RECIP_MASK_VEC_DIV
},
2921 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
2924 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
2926 /* Set up prefix/suffix so the error messages refer to either the command
2927 line argument, or the attribute(target). */
2936 prefix
= "option(\"";
2941 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
2942 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
2943 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
2944 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
2945 #ifdef TARGET_BI_ARCH
2948 #if TARGET_BI_ARCH == 1
2949 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
2950 is on and OPTION_MASK_ABI_X32 is off. We turn off
2951 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
2954 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
2956 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
2957 on and OPTION_MASK_ABI_64 is off. We turn off
2958 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
2961 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
2968 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
2969 OPTION_MASK_ABI_64 for TARGET_X32. */
2970 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
2971 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
2973 else if (TARGET_LP64
)
2975 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
2976 OPTION_MASK_ABI_X32 for TARGET_LP64. */
2977 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
2978 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
2981 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2982 SUBTARGET_OVERRIDE_OPTIONS
;
2985 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2986 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2989 /* -fPIC is the default for x86_64. */
2990 if (TARGET_MACHO
&& TARGET_64BIT
)
2993 /* Need to check -mtune=generic first. */
2994 if (ix86_tune_string
)
2996 if (!strcmp (ix86_tune_string
, "generic")
2997 || !strcmp (ix86_tune_string
, "i686")
2998 /* As special support for cross compilers we read -mtune=native
2999 as -mtune=generic. With native compilers we won't see the
3000 -mtune=native, as it was changed by the driver. */
3001 || !strcmp (ix86_tune_string
, "native"))
3004 ix86_tune_string
= "generic64";
3006 ix86_tune_string
= "generic32";
3008 /* If this call is for setting the option attribute, allow the
3009 generic32/generic64 that was previously set. */
3010 else if (!main_args_p
3011 && (!strcmp (ix86_tune_string
, "generic32")
3012 || !strcmp (ix86_tune_string
, "generic64")))
3014 else if (!strncmp (ix86_tune_string
, "generic", 7))
3015 error ("bad value (%s) for %stune=%s %s",
3016 ix86_tune_string
, prefix
, suffix
, sw
);
3017 else if (!strcmp (ix86_tune_string
, "x86-64"))
3018 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3019 "%stune=k8%s or %stune=generic%s instead as appropriate",
3020 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3024 if (ix86_arch_string
)
3025 ix86_tune_string
= ix86_arch_string
;
3026 if (!ix86_tune_string
)
3028 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3029 ix86_tune_defaulted
= 1;
3032 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3033 need to use a sensible tune option. */
3034 if (!strcmp (ix86_tune_string
, "generic")
3035 || !strcmp (ix86_tune_string
, "x86-64")
3036 || !strcmp (ix86_tune_string
, "i686"))
3039 ix86_tune_string
= "generic64";
3041 ix86_tune_string
= "generic32";
3045 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3047 /* rep; movq isn't available in 32-bit code. */
3048 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3049 ix86_stringop_alg
= no_stringop
;
3052 if (!ix86_arch_string
)
3053 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3055 ix86_arch_specified
= 1;
3057 if (global_options_set
.x_ix86_pmode
)
3059 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3060 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3061 error ("address mode %qs not supported in the %s bit mode",
3062 TARGET_64BIT
? "short" : "long",
3063 TARGET_64BIT
? "64" : "32");
3066 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3068 if (!global_options_set
.x_ix86_abi
)
3069 ix86_abi
= DEFAULT_ABI
;
3071 if (global_options_set
.x_ix86_cmodel
)
3073 switch (ix86_cmodel
)
3078 ix86_cmodel
= CM_SMALL_PIC
;
3080 error ("code model %qs not supported in the %s bit mode",
3087 ix86_cmodel
= CM_MEDIUM_PIC
;
3089 error ("code model %qs not supported in the %s bit mode",
3091 else if (TARGET_X32
)
3092 error ("code model %qs not supported in x32 mode",
3099 ix86_cmodel
= CM_LARGE_PIC
;
3101 error ("code model %qs not supported in the %s bit mode",
3103 else if (TARGET_X32
)
3104 error ("code model %qs not supported in x32 mode",
3110 error ("code model %s does not support PIC mode", "32");
3112 error ("code model %qs not supported in the %s bit mode",
3119 error ("code model %s does not support PIC mode", "kernel");
3120 ix86_cmodel
= CM_32
;
3123 error ("code model %qs not supported in the %s bit mode",
3133 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3134 use of rip-relative addressing. This eliminates fixups that
3135 would otherwise be needed if this object is to be placed in a
3136 DLL, and is essentially just as efficient as direct addressing. */
3137 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3138 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3139 else if (TARGET_64BIT
)
3140 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3142 ix86_cmodel
= CM_32
;
3144 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3146 error ("-masm=intel not supported in this configuration");
3147 ix86_asm_dialect
= ASM_ATT
;
3149 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3150 sorry ("%i-bit mode not compiled in",
3151 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3153 for (i
= 0; i
< pta_size
; i
++)
3154 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3156 ix86_schedule
= processor_alias_table
[i
].schedule
;
3157 ix86_arch
= processor_alias_table
[i
].processor
;
3158 /* Default cpu tuning to the architecture. */
3159 ix86_tune
= ix86_arch
;
3161 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3162 error ("CPU you selected does not support x86-64 "
3165 if (processor_alias_table
[i
].flags
& PTA_MMX
3166 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3167 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3168 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3169 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3170 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3171 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3172 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3173 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3174 if (processor_alias_table
[i
].flags
& PTA_SSE
3175 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3176 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3177 if (processor_alias_table
[i
].flags
& PTA_SSE2
3178 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3179 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3180 if (processor_alias_table
[i
].flags
& PTA_SSE3
3181 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3182 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3183 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3184 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3185 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3186 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3187 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3188 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3189 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3190 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3191 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3192 if (processor_alias_table
[i
].flags
& PTA_AVX
3193 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3194 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3195 if (processor_alias_table
[i
].flags
& PTA_AVX2
3196 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3197 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3198 if (processor_alias_table
[i
].flags
& PTA_FMA
3199 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3200 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3201 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3202 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3203 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3204 if (processor_alias_table
[i
].flags
& PTA_FMA4
3205 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3206 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3207 if (processor_alias_table
[i
].flags
& PTA_XOP
3208 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3209 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3210 if (processor_alias_table
[i
].flags
& PTA_LWP
3211 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3212 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3213 if (processor_alias_table
[i
].flags
& PTA_ABM
3214 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3215 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3216 if (processor_alias_table
[i
].flags
& PTA_BMI
3217 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3218 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3219 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3220 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3221 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3222 if (processor_alias_table
[i
].flags
& PTA_TBM
3223 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3224 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3225 if (processor_alias_table
[i
].flags
& PTA_BMI2
3226 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3227 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3228 if (processor_alias_table
[i
].flags
& PTA_CX16
3229 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3230 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3231 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3232 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3233 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3234 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3235 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3236 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3237 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3238 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3239 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3240 if (processor_alias_table
[i
].flags
& PTA_AES
3241 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3242 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3243 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3244 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3245 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3246 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3247 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3248 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3249 if (processor_alias_table
[i
].flags
& PTA_RDRND
3250 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3251 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3252 if (processor_alias_table
[i
].flags
& PTA_F16C
3253 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3254 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3255 if (processor_alias_table
[i
].flags
& PTA_RTM
3256 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3257 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3258 if (processor_alias_table
[i
].flags
& PTA_HLE
3259 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3260 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3261 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3262 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3263 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3264 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3265 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3266 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3267 if (processor_alias_table
[i
].flags
& PTA_ADX
3268 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3269 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3270 if (processor_alias_table
[i
].flags
& PTA_FXSR
3271 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3272 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3273 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3274 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3275 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3276 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3277 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3278 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3279 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3280 x86_prefetch_sse
= true;
3285 if (!strcmp (ix86_arch_string
, "generic"))
3286 error ("generic CPU can be used only for %stune=%s %s",
3287 prefix
, suffix
, sw
);
3288 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3289 error ("bad value (%s) for %sarch=%s %s",
3290 ix86_arch_string
, prefix
, suffix
, sw
);
3292 ix86_arch_mask
= 1u << ix86_arch
;
3293 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3294 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3296 for (i
= 0; i
< pta_size
; i
++)
3297 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3299 ix86_schedule
= processor_alias_table
[i
].schedule
;
3300 ix86_tune
= processor_alias_table
[i
].processor
;
3303 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3305 if (ix86_tune_defaulted
)
3307 ix86_tune_string
= "x86-64";
3308 for (i
= 0; i
< pta_size
; i
++)
3309 if (! strcmp (ix86_tune_string
,
3310 processor_alias_table
[i
].name
))
3312 ix86_schedule
= processor_alias_table
[i
].schedule
;
3313 ix86_tune
= processor_alias_table
[i
].processor
;
3316 error ("CPU you selected does not support x86-64 "
3322 /* Adjust tuning when compiling for 32-bit ABI. */
3325 case PROCESSOR_GENERIC64
:
3326 ix86_tune
= PROCESSOR_GENERIC32
;
3327 ix86_schedule
= CPU_PENTIUMPRO
;
3330 case PROCESSOR_CORE2_64
:
3331 ix86_tune
= PROCESSOR_CORE2_32
;
3334 case PROCESSOR_COREI7_64
:
3335 ix86_tune
= PROCESSOR_COREI7_32
;
3342 /* Intel CPUs have always interpreted SSE prefetch instructions as
3343 NOPs; so, we can enable SSE prefetch instructions even when
3344 -mtune (rather than -march) points us to a processor that has them.
3345 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3346 higher processors. */
3348 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3349 x86_prefetch_sse
= true;
3353 if (ix86_tune_specified
&& i
== pta_size
)
3354 error ("bad value (%s) for %stune=%s %s",
3355 ix86_tune_string
, prefix
, suffix
, sw
);
3357 ix86_tune_mask
= 1u << ix86_tune
;
3358 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3359 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3361 #ifndef USE_IX86_FRAME_POINTER
3362 #define USE_IX86_FRAME_POINTER 0
3365 #ifndef USE_X86_64_FRAME_POINTER
3366 #define USE_X86_64_FRAME_POINTER 0
3369 /* Set the default values for switches whose default depends on TARGET_64BIT
3370 in case they weren't overwritten by command line options. */
3373 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3374 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3375 if (flag_asynchronous_unwind_tables
== 2)
3376 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3377 if (flag_pcc_struct_return
== 2)
3378 flag_pcc_struct_return
= 0;
3382 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3383 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3384 if (flag_asynchronous_unwind_tables
== 2)
3385 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3386 if (flag_pcc_struct_return
== 2)
3387 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3390 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3392 ix86_cost
= &ix86_size_cost
;
3394 ix86_cost
= ix86_tune_cost
;
3396 /* Arrange to set up i386_stack_locals for all functions. */
3397 init_machine_status
= ix86_init_machine_status
;
3399 /* Validate -mregparm= value. */
3400 if (global_options_set
.x_ix86_regparm
)
3403 warning (0, "-mregparm is ignored in 64-bit mode");
3404 if (ix86_regparm
> REGPARM_MAX
)
3406 error ("-mregparm=%d is not between 0 and %d",
3407 ix86_regparm
, REGPARM_MAX
);
3412 ix86_regparm
= REGPARM_MAX
;
3414 /* Default align_* from the processor table. */
3415 if (align_loops
== 0)
3417 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3418 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3420 if (align_jumps
== 0)
3422 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3423 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3425 if (align_functions
== 0)
3427 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3430 /* Provide default for -mbranch-cost= value. */
3431 if (!global_options_set
.x_ix86_branch_cost
)
3432 ix86_branch_cost
= ix86_cost
->branch_cost
;
3436 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3438 /* Enable by default the SSE and MMX builtins. Do allow the user to
3439 explicitly disable any of these. In particular, disabling SSE and
3440 MMX for kernel code is extremely useful. */
3441 if (!ix86_arch_specified
)
3443 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3444 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3447 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3451 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3453 if (!ix86_arch_specified
)
3455 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3457 /* i386 ABI does not specify red zone. It still makes sense to use it
3458 when programmer takes care to stack from being destroyed. */
3459 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3460 target_flags
|= MASK_NO_RED_ZONE
;
3463 /* Keep nonleaf frame pointers. */
3464 if (flag_omit_frame_pointer
)
3465 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3466 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3467 flag_omit_frame_pointer
= 1;
3469 /* If we're doing fast math, we don't care about comparison order
3470 wrt NaNs. This lets us use a shorter comparison sequence. */
3471 if (flag_finite_math_only
)
3472 target_flags
&= ~MASK_IEEE_FP
;
3474 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3475 since the insns won't need emulation. */
3476 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3477 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3479 /* Likewise, if the target doesn't have a 387, or we've specified
3480 software floating point, don't use 387 inline intrinsics. */
3482 target_flags
|= MASK_NO_FANCY_MATH_387
;
3484 /* Turn on MMX builtins for -msse. */
3486 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3488 /* Enable SSE prefetch. */
3489 if (TARGET_SSE
|| TARGET_PRFCHW
)
3490 x86_prefetch_sse
= true;
3492 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3493 if (TARGET_SSE4_2
|| TARGET_ABM
)
3494 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3496 /* Turn on lzcnt instruction for -mabm. */
3498 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3500 /* Validate -mpreferred-stack-boundary= value or default it to
3501 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3502 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3503 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3505 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3506 int max
= (TARGET_SEH
? 4 : 12);
3508 if (ix86_preferred_stack_boundary_arg
< min
3509 || ix86_preferred_stack_boundary_arg
> max
)
3512 error ("-mpreferred-stack-boundary is not supported "
3515 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3516 ix86_preferred_stack_boundary_arg
, min
, max
);
3519 ix86_preferred_stack_boundary
3520 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3523 /* Set the default value for -mstackrealign. */
3524 if (ix86_force_align_arg_pointer
== -1)
3525 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3527 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3529 /* Validate -mincoming-stack-boundary= value or default it to
3530 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3531 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3532 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3534 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3535 || ix86_incoming_stack_boundary_arg
> 12)
3536 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3537 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3540 ix86_user_incoming_stack_boundary
3541 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3542 ix86_incoming_stack_boundary
3543 = ix86_user_incoming_stack_boundary
;
3547 /* Accept -msseregparm only if at least SSE support is enabled. */
3548 if (TARGET_SSEREGPARM
3550 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3552 if (global_options_set
.x_ix86_fpmath
)
3554 if (ix86_fpmath
& FPMATH_SSE
)
3558 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3559 ix86_fpmath
= FPMATH_387
;
3561 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3563 warning (0, "387 instruction set disabled, using SSE arithmetics");
3564 ix86_fpmath
= FPMATH_SSE
;
3569 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3571 /* If the i387 is disabled, then do not return values in it. */
3573 target_flags
&= ~MASK_FLOAT_RETURNS
;
3575 /* Use external vectorized library in vectorizing intrinsics. */
3576 if (global_options_set
.x_ix86_veclibabi_type
)
3577 switch (ix86_veclibabi_type
)
3579 case ix86_veclibabi_type_svml
:
3580 ix86_veclib_handler
= ix86_veclibabi_svml
;
3583 case ix86_veclibabi_type_acml
:
3584 ix86_veclib_handler
= ix86_veclibabi_acml
;
3591 if ((!USE_IX86_FRAME_POINTER
3592 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3593 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3595 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3597 /* ??? Unwind info is not correct around the CFG unless either a frame
3598 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3599 unwind info generation to be aware of the CFG and propagating states
3601 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3602 || flag_exceptions
|| flag_non_call_exceptions
)
3603 && flag_omit_frame_pointer
3604 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3606 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3607 warning (0, "unwind tables currently require either a frame pointer "
3608 "or %saccumulate-outgoing-args%s for correctness",
3610 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3613 /* If stack probes are required, the space used for large function
3614 arguments on the stack must also be probed, so enable
3615 -maccumulate-outgoing-args so this happens in the prologue. */
3616 if (TARGET_STACK_PROBE
3617 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3619 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3620 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3621 "for correctness", prefix
, suffix
);
3622 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3625 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3628 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3629 p
= strchr (internal_label_prefix
, 'X');
3630 internal_label_prefix_len
= p
- internal_label_prefix
;
3634 /* When scheduling description is not available, disable scheduler pass
3635 so it won't slow down the compilation and make x87 code slower. */
3636 if (!TARGET_SCHEDULE
)
3637 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3639 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3640 ix86_tune_cost
->simultaneous_prefetches
,
3641 global_options
.x_param_values
,
3642 global_options_set
.x_param_values
);
3643 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3644 ix86_tune_cost
->prefetch_block
,
3645 global_options
.x_param_values
,
3646 global_options_set
.x_param_values
);
3647 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3648 ix86_tune_cost
->l1_cache_size
,
3649 global_options
.x_param_values
,
3650 global_options_set
.x_param_values
);
3651 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3652 ix86_tune_cost
->l2_cache_size
,
3653 global_options
.x_param_values
,
3654 global_options_set
.x_param_values
);
3656 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3657 if (flag_prefetch_loop_arrays
< 0
3659 && (optimize
>= 3 || flag_profile_use
)
3660 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3661 flag_prefetch_loop_arrays
= 1;
3663 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3664 can be optimized to ap = __builtin_next_arg (0). */
3665 if (!TARGET_64BIT
&& !flag_split_stack
)
3666 targetm
.expand_builtin_va_start
= NULL
;
3670 ix86_gen_leave
= gen_leave_rex64
;
3671 if (Pmode
== DImode
)
3673 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3674 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3675 ix86_gen_tls_local_dynamic_base_64
3676 = gen_tls_local_dynamic_base_64_di
;
3680 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3681 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3682 ix86_gen_tls_local_dynamic_base_64
3683 = gen_tls_local_dynamic_base_64_si
;
3688 ix86_gen_leave
= gen_leave
;
3689 ix86_gen_monitor
= gen_sse3_monitor
;
3692 if (Pmode
== DImode
)
3694 ix86_gen_add3
= gen_adddi3
;
3695 ix86_gen_sub3
= gen_subdi3
;
3696 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3697 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3698 ix86_gen_andsp
= gen_anddi3
;
3699 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3700 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3701 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3705 ix86_gen_add3
= gen_addsi3
;
3706 ix86_gen_sub3
= gen_subsi3
;
3707 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3708 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3709 ix86_gen_andsp
= gen_andsi3
;
3710 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3711 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3712 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3716 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3718 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3721 if (!TARGET_64BIT
&& flag_pic
)
3723 if (flag_fentry
> 0)
3724 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3728 else if (TARGET_SEH
)
3730 if (flag_fentry
== 0)
3731 sorry ("-mno-fentry isn%'t compatible with SEH");
3734 else if (flag_fentry
< 0)
3736 #if defined(PROFILE_BEFORE_PROLOGUE)
3745 /* When not optimize for size, enable vzeroupper optimization for
3746 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3747 AVX unaligned load/store. */
3750 if (flag_expensive_optimizations
3751 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3752 target_flags
|= MASK_VZEROUPPER
;
3753 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3754 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3755 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3756 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3757 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3758 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3759 /* Enable 128-bit AVX instruction generation
3760 for the auto-vectorizer. */
3761 if (TARGET_AVX128_OPTIMAL
3762 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
3763 target_flags
|= MASK_PREFER_AVX128
;
3768 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3769 target_flags
&= ~MASK_VZEROUPPER
;
3772 if (ix86_recip_name
)
3774 char *p
= ASTRDUP (ix86_recip_name
);
3776 unsigned int mask
, i
;
3779 while ((q
= strtok (p
, ",")) != NULL
)
3790 if (!strcmp (q
, "default"))
3791 mask
= RECIP_MASK_ALL
;
3794 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3795 if (!strcmp (q
, recip_options
[i
].string
))
3797 mask
= recip_options
[i
].mask
;
3801 if (i
== ARRAY_SIZE (recip_options
))
3803 error ("unknown option for -mrecip=%s", q
);
3805 mask
= RECIP_MASK_NONE
;
3809 recip_mask_explicit
|= mask
;
3811 recip_mask
&= ~mask
;
3818 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3819 else if (target_flags_explicit
& MASK_RECIP
)
3820 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3822 /* Default long double to 64-bit for Bionic. */
3823 if (TARGET_HAS_BIONIC
3824 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
3825 target_flags
|= MASK_LONG_DOUBLE_64
;
3827 /* Save the initial options in case the user does function specific
3830 target_option_default_node
= target_option_current_node
3831 = build_target_option_node ();
3834 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3837 ix86_option_override (void)
3839 static struct register_pass_info insert_vzeroupper_info
3840 = { &pass_insert_vzeroupper
.pass
, "reload",
3841 1, PASS_POS_INSERT_AFTER
3844 ix86_option_override_internal (true);
3847 /* This needs to be done at start up. It's convenient to do it here. */
3848 register_pass (&insert_vzeroupper_info
);
3851 /* Update register usage after having seen the compiler flags. */
3854 ix86_conditional_register_usage (void)
3859 /* The PIC register, if it exists, is fixed. */
3860 j
= PIC_OFFSET_TABLE_REGNUM
;
3861 if (j
!= INVALID_REGNUM
)
3862 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3864 /* For 32-bit targets, squash the REX registers. */
3867 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3868 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3869 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3870 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3873 /* See the definition of CALL_USED_REGISTERS in i386.h. */
3874 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
3875 : TARGET_64BIT
? (1 << 2)
3878 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3880 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3882 /* Set/reset conditionally defined registers from
3883 CALL_USED_REGISTERS initializer. */
3884 if (call_used_regs
[i
] > 1)
3885 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
3887 /* Calculate registers of CLOBBERED_REGS register set
3888 as call used registers from GENERAL_REGS register set. */
3889 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3890 && call_used_regs
[i
])
3891 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3894 /* If MMX is disabled, squash the registers. */
3896 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3897 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3898 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3900 /* If SSE is disabled, squash the registers. */
3902 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3903 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3904 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3906 /* If the FPU is disabled, squash the registers. */
3907 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3908 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3909 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
3910 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3914 /* Save the current options */
3917 ix86_function_specific_save (struct cl_target_option
*ptr
)
3919 ptr
->arch
= ix86_arch
;
3920 ptr
->schedule
= ix86_schedule
;
3921 ptr
->tune
= ix86_tune
;
3922 ptr
->branch_cost
= ix86_branch_cost
;
3923 ptr
->tune_defaulted
= ix86_tune_defaulted
;
3924 ptr
->arch_specified
= ix86_arch_specified
;
3925 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
3926 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
3927 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
3929 /* The fields are char but the variables are not; make sure the
3930 values fit in the fields. */
3931 gcc_assert (ptr
->arch
== ix86_arch
);
3932 gcc_assert (ptr
->schedule
== ix86_schedule
);
3933 gcc_assert (ptr
->tune
== ix86_tune
);
3934 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
3937 /* Restore the current options */
3940 ix86_function_specific_restore (struct cl_target_option
*ptr
)
3942 enum processor_type old_tune
= ix86_tune
;
3943 enum processor_type old_arch
= ix86_arch
;
3944 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3947 ix86_arch
= (enum processor_type
) ptr
->arch
;
3948 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
3949 ix86_tune
= (enum processor_type
) ptr
->tune
;
3950 ix86_branch_cost
= ptr
->branch_cost
;
3951 ix86_tune_defaulted
= ptr
->tune_defaulted
;
3952 ix86_arch_specified
= ptr
->arch_specified
;
3953 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
3954 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
3955 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
3957 /* Recreate the arch feature tests if the arch changed */
3958 if (old_arch
!= ix86_arch
)
3960 ix86_arch_mask
= 1u << ix86_arch
;
3961 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3962 ix86_arch_features
[i
]
3963 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3966 /* Recreate the tune optimization tests */
3967 if (old_tune
!= ix86_tune
)
3969 ix86_tune_mask
= 1u << ix86_tune
;
3970 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3971 ix86_tune_features
[i
]
3972 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3976 /* Print the current options */
3979 ix86_function_specific_print (FILE *file
, int indent
,
3980 struct cl_target_option
*ptr
)
3983 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
3984 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
3986 fprintf (file
, "%*sarch = %d (%s)\n",
3989 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
3990 ? cpu_names
[ptr
->arch
]
3993 fprintf (file
, "%*stune = %d (%s)\n",
3996 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
3997 ? cpu_names
[ptr
->tune
]
4000 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4004 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4005 free (target_string
);
4010 /* Inner function to process the attribute((target(...))), take an argument and
4011 set the current options from the argument. If we have a list, recursively go
4015 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4016 struct gcc_options
*enum_opts_set
)
4021 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4022 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4023 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4024 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4025 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4041 enum ix86_opt_type type
;
4046 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4047 IX86_ATTR_ISA ("abm", OPT_mabm
),
4048 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4049 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4050 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4051 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4052 IX86_ATTR_ISA ("aes", OPT_maes
),
4053 IX86_ATTR_ISA ("avx", OPT_mavx
),
4054 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4055 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4056 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4057 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4058 IX86_ATTR_ISA ("sse", OPT_msse
),
4059 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4060 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4061 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4062 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4063 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4064 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4065 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4066 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4067 IX86_ATTR_ISA ("fma", OPT_mfma
),
4068 IX86_ATTR_ISA ("xop", OPT_mxop
),
4069 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4070 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4071 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4072 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4073 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4074 IX86_ATTR_ISA ("hle", OPT_mhle
),
4075 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4076 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4077 IX86_ATTR_ISA ("adx", OPT_madx
),
4078 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4079 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4080 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4083 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4085 /* string options */
4086 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4087 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4090 IX86_ATTR_YES ("cld",
4094 IX86_ATTR_NO ("fancy-math-387",
4095 OPT_mfancy_math_387
,
4096 MASK_NO_FANCY_MATH_387
),
4098 IX86_ATTR_YES ("ieee-fp",
4102 IX86_ATTR_YES ("inline-all-stringops",
4103 OPT_minline_all_stringops
,
4104 MASK_INLINE_ALL_STRINGOPS
),
4106 IX86_ATTR_YES ("inline-stringops-dynamically",
4107 OPT_minline_stringops_dynamically
,
4108 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4110 IX86_ATTR_NO ("align-stringops",
4111 OPT_mno_align_stringops
,
4112 MASK_NO_ALIGN_STRINGOPS
),
4114 IX86_ATTR_YES ("recip",
4120 /* If this is a list, recurse to get the options. */
4121 if (TREE_CODE (args
) == TREE_LIST
)
4125 for (; args
; args
= TREE_CHAIN (args
))
4126 if (TREE_VALUE (args
)
4127 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4128 p_strings
, enum_opts_set
))
4134 else if (TREE_CODE (args
) != STRING_CST
)
4137 /* Handle multiple arguments separated by commas. */
4138 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4140 while (next_optstr
&& *next_optstr
!= '\0')
4142 char *p
= next_optstr
;
4144 char *comma
= strchr (next_optstr
, ',');
4145 const char *opt_string
;
4146 size_t len
, opt_len
;
4151 enum ix86_opt_type type
= ix86_opt_unknown
;
4157 len
= comma
- next_optstr
;
4158 next_optstr
= comma
+ 1;
4166 /* Recognize no-xxx. */
4167 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4176 /* Find the option. */
4179 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4181 type
= attrs
[i
].type
;
4182 opt_len
= attrs
[i
].len
;
4183 if (ch
== attrs
[i
].string
[0]
4184 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4187 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4190 mask
= attrs
[i
].mask
;
4191 opt_string
= attrs
[i
].string
;
4196 /* Process the option. */
4199 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4203 else if (type
== ix86_opt_isa
)
4205 struct cl_decoded_option decoded
;
4207 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4208 ix86_handle_option (&global_options
, &global_options_set
,
4209 &decoded
, input_location
);
4212 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4214 if (type
== ix86_opt_no
)
4215 opt_set_p
= !opt_set_p
;
4218 target_flags
|= mask
;
4220 target_flags
&= ~mask
;
4223 else if (type
== ix86_opt_str
)
4227 error ("option(\"%s\") was already specified", opt_string
);
4231 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4234 else if (type
== ix86_opt_enum
)
4239 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4241 set_option (&global_options
, enum_opts_set
, opt
, value
,
4242 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4246 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4258 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4261 ix86_valid_target_attribute_tree (tree args
)
4263 const char *orig_arch_string
= ix86_arch_string
;
4264 const char *orig_tune_string
= ix86_tune_string
;
4265 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4266 int orig_tune_defaulted
= ix86_tune_defaulted
;
4267 int orig_arch_specified
= ix86_arch_specified
;
4268 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4271 struct cl_target_option
*def
4272 = TREE_TARGET_OPTION (target_option_default_node
);
4273 struct gcc_options enum_opts_set
;
4275 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4277 /* Process each of the options on the chain. */
4278 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4282 /* If the changed options are different from the default, rerun
4283 ix86_option_override_internal, and then save the options away.
4284 The string options are are attribute options, and will be undone
4285 when we copy the save structure. */
4286 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4287 || target_flags
!= def
->x_target_flags
4288 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4289 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4290 || enum_opts_set
.x_ix86_fpmath
)
4292 /* If we are using the default tune= or arch=, undo the string assigned,
4293 and use the default. */
4294 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4295 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4296 else if (!orig_arch_specified
)
4297 ix86_arch_string
= NULL
;
4299 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4300 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4301 else if (orig_tune_defaulted
)
4302 ix86_tune_string
= NULL
;
4304 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4305 if (enum_opts_set
.x_ix86_fpmath
)
4306 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4307 else if (!TARGET_64BIT
&& TARGET_SSE
)
4309 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4310 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4313 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4314 ix86_option_override_internal (false);
4316 /* Add any builtin functions with the new isa if any. */
4317 ix86_add_new_builtins (ix86_isa_flags
);
4319 /* Save the current options unless we are validating options for
4321 t
= build_target_option_node ();
4323 ix86_arch_string
= orig_arch_string
;
4324 ix86_tune_string
= orig_tune_string
;
4325 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4327 /* Free up memory allocated to hold the strings */
4328 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4329 free (option_strings
[i
]);
4335 /* Hook to validate attribute((target("string"))). */
4338 ix86_valid_target_attribute_p (tree fndecl
,
4339 tree
ARG_UNUSED (name
),
4341 int ARG_UNUSED (flags
))
4343 struct cl_target_option cur_target
;
4345 tree old_optimize
= build_optimization_node ();
4346 tree new_target
, new_optimize
;
4347 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4349 /* If the function changed the optimization levels as well as setting target
4350 options, start with the optimizations specified. */
4351 if (func_optimize
&& func_optimize
!= old_optimize
)
4352 cl_optimization_restore (&global_options
,
4353 TREE_OPTIMIZATION (func_optimize
));
4355 /* The target attributes may also change some optimization flags, so update
4356 the optimization options if necessary. */
4357 cl_target_option_save (&cur_target
, &global_options
);
4358 new_target
= ix86_valid_target_attribute_tree (args
);
4359 new_optimize
= build_optimization_node ();
4366 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4368 if (old_optimize
!= new_optimize
)
4369 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4372 cl_target_option_restore (&global_options
, &cur_target
);
4374 if (old_optimize
!= new_optimize
)
4375 cl_optimization_restore (&global_options
,
4376 TREE_OPTIMIZATION (old_optimize
));
4382 /* Hook to determine if one function can safely inline another. */
4385 ix86_can_inline_p (tree caller
, tree callee
)
4388 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4389 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4391 /* If callee has no option attributes, then it is ok to inline. */
4395 /* If caller has no option attributes, but callee does then it is not ok to
4397 else if (!caller_tree
)
4402 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4403 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4405 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4406 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4408 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4409 != callee_opts
->x_ix86_isa_flags
)
4412 /* See if we have the same non-isa options. */
4413 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4416 /* See if arch, tune, etc. are the same. */
4417 else if (caller_opts
->arch
!= callee_opts
->arch
)
4420 else if (caller_opts
->tune
!= callee_opts
->tune
)
4423 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4426 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4437 /* Remember the last target of ix86_set_current_function. */
4438 static GTY(()) tree ix86_previous_fndecl
;
4440 /* Establish appropriate back-end context for processing the function
4441 FNDECL. The argument might be NULL to indicate processing at top
4442 level, outside of any function scope. */
4444 ix86_set_current_function (tree fndecl
)
4446 /* Only change the context if the function changes. This hook is called
4447 several times in the course of compiling a function, and we don't want to
4448 slow things down too much or call target_reinit when it isn't safe. */
4449 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4451 tree old_tree
= (ix86_previous_fndecl
4452 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4455 tree new_tree
= (fndecl
4456 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4459 ix86_previous_fndecl
= fndecl
;
4460 if (old_tree
== new_tree
)
4465 cl_target_option_restore (&global_options
,
4466 TREE_TARGET_OPTION (new_tree
));
4472 struct cl_target_option
*def
4473 = TREE_TARGET_OPTION (target_option_current_node
);
4475 cl_target_option_restore (&global_options
, def
);
4482 /* Return true if this goes in large data/bss. */
4485 ix86_in_large_data_p (tree exp
)
4487 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4490 /* Functions are never large data. */
4491 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4494 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4496 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4497 if (strcmp (section
, ".ldata") == 0
4498 || strcmp (section
, ".lbss") == 0)
4504 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4506 /* If this is an incomplete type with size 0, then we can't put it
4507 in data because it might be too big when completed. */
4508 if (!size
|| size
> ix86_section_threshold
)
4515 /* Switch to the appropriate section for output of DECL.
4516 DECL is either a `VAR_DECL' node or a constant of some sort.
4517 RELOC indicates whether forming the initial value of DECL requires
4518 link-time relocations. */
4520 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4524 x86_64_elf_select_section (tree decl
, int reloc
,
4525 unsigned HOST_WIDE_INT align
)
4527 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4528 && ix86_in_large_data_p (decl
))
4530 const char *sname
= NULL
;
4531 unsigned int flags
= SECTION_WRITE
;
4532 switch (categorize_decl_for_section (decl
, reloc
))
4537 case SECCAT_DATA_REL
:
4538 sname
= ".ldata.rel";
4540 case SECCAT_DATA_REL_LOCAL
:
4541 sname
= ".ldata.rel.local";
4543 case SECCAT_DATA_REL_RO
:
4544 sname
= ".ldata.rel.ro";
4546 case SECCAT_DATA_REL_RO_LOCAL
:
4547 sname
= ".ldata.rel.ro.local";
4551 flags
|= SECTION_BSS
;
4554 case SECCAT_RODATA_MERGE_STR
:
4555 case SECCAT_RODATA_MERGE_STR_INIT
:
4556 case SECCAT_RODATA_MERGE_CONST
:
4560 case SECCAT_SRODATA
:
4567 /* We don't split these for medium model. Place them into
4568 default sections and hope for best. */
4573 /* We might get called with string constants, but get_named_section
4574 doesn't like them as they are not DECLs. Also, we need to set
4575 flags in that case. */
4577 return get_section (sname
, flags
, NULL
);
4578 return get_named_section (decl
, sname
, reloc
);
4581 return default_elf_select_section (decl
, reloc
, align
);
4584 /* Build up a unique section name, expressed as a
4585 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4586 RELOC indicates whether the initial value of EXP requires
4587 link-time relocations. */
4589 static void ATTRIBUTE_UNUSED
4590 x86_64_elf_unique_section (tree decl
, int reloc
)
4592 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4593 && ix86_in_large_data_p (decl
))
4595 const char *prefix
= NULL
;
4596 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4597 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4599 switch (categorize_decl_for_section (decl
, reloc
))
4602 case SECCAT_DATA_REL
:
4603 case SECCAT_DATA_REL_LOCAL
:
4604 case SECCAT_DATA_REL_RO
:
4605 case SECCAT_DATA_REL_RO_LOCAL
:
4606 prefix
= one_only
? ".ld" : ".ldata";
4609 prefix
= one_only
? ".lb" : ".lbss";
4612 case SECCAT_RODATA_MERGE_STR
:
4613 case SECCAT_RODATA_MERGE_STR_INIT
:
4614 case SECCAT_RODATA_MERGE_CONST
:
4615 prefix
= one_only
? ".lr" : ".lrodata";
4617 case SECCAT_SRODATA
:
4624 /* We don't split these for medium model. Place them into
4625 default sections and hope for best. */
4630 const char *name
, *linkonce
;
4633 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4634 name
= targetm
.strip_name_encoding (name
);
4636 /* If we're using one_only, then there needs to be a .gnu.linkonce
4637 prefix to the section name. */
4638 linkonce
= one_only
? ".gnu.linkonce" : "";
4640 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4642 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4646 default_unique_section (decl
, reloc
);
4649 #ifdef COMMON_ASM_OP
4650 /* This says how to output assembler code to declare an
4651 uninitialized external linkage data object.
4653 For medium model x86-64 we need to use .largecomm opcode for
4656 x86_elf_aligned_common (FILE *file
,
4657 const char *name
, unsigned HOST_WIDE_INT size
,
4660 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4661 && size
> (unsigned int)ix86_section_threshold
)
4662 fputs (".largecomm\t", file
);
4664 fputs (COMMON_ASM_OP
, file
);
4665 assemble_name (file
, name
);
4666 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4667 size
, align
/ BITS_PER_UNIT
);
4671 /* Utility function for targets to use in implementing
4672 ASM_OUTPUT_ALIGNED_BSS. */
4675 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4676 const char *name
, unsigned HOST_WIDE_INT size
,
4679 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4680 && size
> (unsigned int)ix86_section_threshold
)
4681 switch_to_section (get_named_section (decl
, ".lbss", 0));
4683 switch_to_section (bss_section
);
4684 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4685 #ifdef ASM_DECLARE_OBJECT_NAME
4686 last_assemble_variable_decl
= decl
;
4687 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4689 /* Standard thing is just output label for the object. */
4690 ASM_OUTPUT_LABEL (file
, name
);
4691 #endif /* ASM_DECLARE_OBJECT_NAME */
4692 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4695 /* Decide whether we must probe the stack before any space allocation
4696 on this target. It's essentially TARGET_STACK_PROBE except when
4697 -fstack-check causes the stack to be already probed differently. */
4700 ix86_target_stack_probe (void)
4702 /* Do not probe the stack twice if static stack checking is enabled. */
4703 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4706 return TARGET_STACK_PROBE
;
4709 /* Decide whether we can make a sibling call to a function. DECL is the
4710 declaration of the function being targeted by the call and EXP is the
4711 CALL_EXPR representing the call. */
4714 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4716 tree type
, decl_or_type
;
4719 /* If we are generating position-independent code, we cannot sibcall
4720 optimize any indirect call, or a direct call to a global function,
4721 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4725 && (!decl
|| !targetm
.binds_local_p (decl
)))
4728 /* If we need to align the outgoing stack, then sibcalling would
4729 unalign the stack, which may break the called function. */
4730 if (ix86_minimum_incoming_stack_boundary (true)
4731 < PREFERRED_STACK_BOUNDARY
)
4736 decl_or_type
= decl
;
4737 type
= TREE_TYPE (decl
);
4741 /* We're looking at the CALL_EXPR, we need the type of the function. */
4742 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4743 type
= TREE_TYPE (type
); /* pointer type */
4744 type
= TREE_TYPE (type
); /* function type */
4745 decl_or_type
= type
;
4748 /* Check that the return value locations are the same. Like
4749 if we are returning floats on the 80387 register stack, we cannot
4750 make a sibcall from a function that doesn't return a float to a
4751 function that does or, conversely, from a function that does return
4752 a float to a function that doesn't; the necessary stack adjustment
4753 would not be executed. This is also the place we notice
4754 differences in the return value ABI. Note that it is ok for one
4755 of the functions to have void return type as long as the return
4756 value of the other is passed in a register. */
4757 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4758 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4760 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4762 if (!rtx_equal_p (a
, b
))
4765 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4767 else if (!rtx_equal_p (a
, b
))
4772 /* The SYSV ABI has more call-clobbered registers;
4773 disallow sibcalls from MS to SYSV. */
4774 if (cfun
->machine
->call_abi
== MS_ABI
4775 && ix86_function_type_abi (type
) == SYSV_ABI
)
4780 /* If this call is indirect, we'll need to be able to use a
4781 call-clobbered register for the address of the target function.
4782 Make sure that all such registers are not used for passing
4783 parameters. Note that DLLIMPORT functions are indirect. */
4785 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4787 if (ix86_function_regparm (type
, NULL
) >= 3)
4789 /* ??? Need to count the actual number of registers to be used,
4790 not the possible number of registers. Fix later. */
4796 /* Otherwise okay. That also includes certain types of indirect calls. */
4800 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4801 and "sseregparm" calling convention attributes;
4802 arguments as in struct attribute_spec.handler. */
4805 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4807 int flags ATTRIBUTE_UNUSED
,
4810 if (TREE_CODE (*node
) != FUNCTION_TYPE
4811 && TREE_CODE (*node
) != METHOD_TYPE
4812 && TREE_CODE (*node
) != FIELD_DECL
4813 && TREE_CODE (*node
) != TYPE_DECL
)
4815 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4817 *no_add_attrs
= true;
4821 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4822 if (is_attribute_p ("regparm", name
))
4826 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4828 error ("fastcall and regparm attributes are not compatible");
4831 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4833 error ("regparam and thiscall attributes are not compatible");
4836 cst
= TREE_VALUE (args
);
4837 if (TREE_CODE (cst
) != INTEGER_CST
)
4839 warning (OPT_Wattributes
,
4840 "%qE attribute requires an integer constant argument",
4842 *no_add_attrs
= true;
4844 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4846 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4848 *no_add_attrs
= true;
4856 /* Do not warn when emulating the MS ABI. */
4857 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4858 && TREE_CODE (*node
) != METHOD_TYPE
)
4859 || ix86_function_type_abi (*node
) != MS_ABI
)
4860 warning (OPT_Wattributes
, "%qE attribute ignored",
4862 *no_add_attrs
= true;
4866 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4867 if (is_attribute_p ("fastcall", name
))
4869 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4871 error ("fastcall and cdecl attributes are not compatible");
4873 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4875 error ("fastcall and stdcall attributes are not compatible");
4877 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4879 error ("fastcall and regparm attributes are not compatible");
4881 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4883 error ("fastcall and thiscall attributes are not compatible");
4887 /* Can combine stdcall with fastcall (redundant), regparm and
4889 else if (is_attribute_p ("stdcall", name
))
4891 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4893 error ("stdcall and cdecl attributes are not compatible");
4895 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4897 error ("stdcall and fastcall attributes are not compatible");
4899 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4901 error ("stdcall and thiscall attributes are not compatible");
4905 /* Can combine cdecl with regparm and sseregparm. */
4906 else if (is_attribute_p ("cdecl", name
))
4908 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4910 error ("stdcall and cdecl attributes are not compatible");
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4914 error ("fastcall and cdecl attributes are not compatible");
4916 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4918 error ("cdecl and thiscall attributes are not compatible");
4921 else if (is_attribute_p ("thiscall", name
))
4923 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
4924 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
4926 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4928 error ("stdcall and thiscall attributes are not compatible");
4930 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4932 error ("fastcall and thiscall attributes are not compatible");
4934 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4936 error ("cdecl and thiscall attributes are not compatible");
4940 /* Can combine sseregparm with all attributes. */
4945 /* The transactional memory builtins are implicitly regparm or fastcall
4946 depending on the ABI. Override the generic do-nothing attribute that
4947 these builtins were declared with, and replace it with one of the two
4948 attributes that we expect elsewhere. */
4951 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
4952 tree args ATTRIBUTE_UNUSED
,
4953 int flags ATTRIBUTE_UNUSED
,
4958 /* In no case do we want to add the placeholder attribute. */
4959 *no_add_attrs
= true;
4961 /* The 64-bit ABI is unchanged for transactional memory. */
4965 /* ??? Is there a better way to validate 32-bit windows? We have
4966 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
4967 if (CHECK_STACK_LIMIT
> 0)
4968 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
4971 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
4972 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
4974 decl_attributes (node
, alt
, flags
);
4979 /* This function determines from TYPE the calling-convention. */
4982 ix86_get_callcvt (const_tree type
)
4984 unsigned int ret
= 0;
4989 return IX86_CALLCVT_CDECL
;
4991 attrs
= TYPE_ATTRIBUTES (type
);
4992 if (attrs
!= NULL_TREE
)
4994 if (lookup_attribute ("cdecl", attrs
))
4995 ret
|= IX86_CALLCVT_CDECL
;
4996 else if (lookup_attribute ("stdcall", attrs
))
4997 ret
|= IX86_CALLCVT_STDCALL
;
4998 else if (lookup_attribute ("fastcall", attrs
))
4999 ret
|= IX86_CALLCVT_FASTCALL
;
5000 else if (lookup_attribute ("thiscall", attrs
))
5001 ret
|= IX86_CALLCVT_THISCALL
;
5003 /* Regparam isn't allowed for thiscall and fastcall. */
5004 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5006 if (lookup_attribute ("regparm", attrs
))
5007 ret
|= IX86_CALLCVT_REGPARM
;
5008 if (lookup_attribute ("sseregparm", attrs
))
5009 ret
|= IX86_CALLCVT_SSEREGPARM
;
5012 if (IX86_BASE_CALLCVT(ret
) != 0)
5016 is_stdarg
= stdarg_p (type
);
5017 if (TARGET_RTD
&& !is_stdarg
)
5018 return IX86_CALLCVT_STDCALL
| ret
;
5022 || TREE_CODE (type
) != METHOD_TYPE
5023 || ix86_function_type_abi (type
) != MS_ABI
)
5024 return IX86_CALLCVT_CDECL
| ret
;
5026 return IX86_CALLCVT_THISCALL
;
5029 /* Return 0 if the attributes for two types are incompatible, 1 if they
5030 are compatible, and 2 if they are nearly compatible (which causes a
5031 warning to be generated). */
5034 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5036 unsigned int ccvt1
, ccvt2
;
5038 if (TREE_CODE (type1
) != FUNCTION_TYPE
5039 && TREE_CODE (type1
) != METHOD_TYPE
)
5042 ccvt1
= ix86_get_callcvt (type1
);
5043 ccvt2
= ix86_get_callcvt (type2
);
5046 if (ix86_function_regparm (type1
, NULL
)
5047 != ix86_function_regparm (type2
, NULL
))
5053 /* Return the regparm value for a function with the indicated TYPE and DECL.
5054 DECL may be NULL when calling function indirectly
5055 or considering a libcall. */
5058 ix86_function_regparm (const_tree type
, const_tree decl
)
5065 return (ix86_function_type_abi (type
) == SYSV_ABI
5066 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5067 ccvt
= ix86_get_callcvt (type
);
5068 regparm
= ix86_regparm
;
5070 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5072 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5075 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5079 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5081 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5084 /* Use register calling convention for local functions when possible. */
5086 && TREE_CODE (decl
) == FUNCTION_DECL
5088 && !(profile_flag
&& !flag_fentry
))
5090 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5091 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5092 if (i
&& i
->local
&& i
->can_change_signature
)
5094 int local_regparm
, globals
= 0, regno
;
5096 /* Make sure no regparm register is taken by a
5097 fixed register variable. */
5098 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5099 if (fixed_regs
[local_regparm
])
5102 /* We don't want to use regparm(3) for nested functions as
5103 these use a static chain pointer in the third argument. */
5104 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5107 /* In 32-bit mode save a register for the split stack. */
5108 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5111 /* Each fixed register usage increases register pressure,
5112 so less registers should be used for argument passing.
5113 This functionality can be overriden by an explicit
5115 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5116 if (fixed_regs
[regno
])
5120 = globals
< local_regparm
? local_regparm
- globals
: 0;
5122 if (local_regparm
> regparm
)
5123 regparm
= local_regparm
;
5130 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5131 DFmode (2) arguments in SSE registers for a function with the
5132 indicated TYPE and DECL. DECL may be NULL when calling function
5133 indirectly or considering a libcall. Otherwise return 0. */
5136 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5138 gcc_assert (!TARGET_64BIT
);
5140 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5141 by the sseregparm attribute. */
5142 if (TARGET_SSEREGPARM
5143 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5150 error ("calling %qD with attribute sseregparm without "
5151 "SSE/SSE2 enabled", decl
);
5153 error ("calling %qT with attribute sseregparm without "
5154 "SSE/SSE2 enabled", type
);
5162 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5163 (and DFmode for SSE2) arguments in SSE registers. */
5164 if (decl
&& TARGET_SSE_MATH
&& optimize
5165 && !(profile_flag
&& !flag_fentry
))
5167 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5168 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5169 if (i
&& i
->local
&& i
->can_change_signature
)
5170 return TARGET_SSE2
? 2 : 1;
5176 /* Return true if EAX is live at the start of the function. Used by
5177 ix86_expand_prologue to determine if we need special help before
5178 calling allocate_stack_worker. */
5181 ix86_eax_live_at_start_p (void)
5183 /* Cheat. Don't bother working forward from ix86_function_regparm
5184 to the function type to whether an actual argument is located in
5185 eax. Instead just look at cfg info, which is still close enough
5186 to correct at this point. This gives false positives for broken
5187 functions that might use uninitialized data that happens to be
5188 allocated in eax, but who cares? */
5189 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5193 ix86_keep_aggregate_return_pointer (tree fntype
)
5199 attr
= lookup_attribute ("callee_pop_aggregate_return",
5200 TYPE_ATTRIBUTES (fntype
));
5202 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5204 /* For 32-bit MS-ABI the default is to keep aggregate
5206 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5209 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5212 /* Value is the number of bytes of arguments automatically
5213 popped when returning from a subroutine call.
5214 FUNDECL is the declaration node of the function (as a tree),
5215 FUNTYPE is the data type of the function (as a tree),
5216 or for a library call it is an identifier node for the subroutine name.
5217 SIZE is the number of bytes of arguments passed on the stack.
5219 On the 80386, the RTD insn may be used to pop them if the number
5220 of args is fixed, but if the number is variable then the caller
5221 must pop them all. RTD can't be used for library calls now
5222 because the library is compiled with the Unix compiler.
5223 Use of RTD is a selectable option, since it is incompatible with
5224 standard Unix calling sequences. If the option is not selected,
5225 the caller must always pop the args.
5227 The attribute stdcall is equivalent to RTD on a per module basis. */
5230 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5234 /* None of the 64-bit ABIs pop arguments. */
5238 ccvt
= ix86_get_callcvt (funtype
);
5240 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5241 | IX86_CALLCVT_THISCALL
)) != 0
5242 && ! stdarg_p (funtype
))
5245 /* Lose any fake structure return argument if it is passed on the stack. */
5246 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5247 && !ix86_keep_aggregate_return_pointer (funtype
))
5249 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5251 return GET_MODE_SIZE (Pmode
);
5257 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5260 ix86_legitimate_combined_insn (rtx insn
)
5262 /* Check operand constraints in case hard registers were propagated
5263 into insn pattern. This check prevents combine pass from
5264 generating insn patterns with invalid hard register operands.
5265 These invalid insns can eventually confuse reload to error out
5266 with a spill failure. See also PRs 46829 and 46843. */
5267 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5271 extract_insn (insn
);
5272 preprocess_constraints ();
5274 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5276 rtx op
= recog_data
.operand
[i
];
5277 enum machine_mode mode
= GET_MODE (op
);
5278 struct operand_alternative
*op_alt
;
5283 /* A unary operator may be accepted by the predicate, but it
5284 is irrelevant for matching constraints. */
5288 if (GET_CODE (op
) == SUBREG
)
5290 if (REG_P (SUBREG_REG (op
))
5291 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5292 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5293 GET_MODE (SUBREG_REG (op
)),
5296 op
= SUBREG_REG (op
);
5299 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5302 op_alt
= recog_op_alt
[i
];
5304 /* Operand has no constraints, anything is OK. */
5305 win
= !recog_data
.n_alternatives
;
5307 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5309 if (op_alt
[j
].anything_ok
5310 || (op_alt
[j
].matches
!= -1
5312 (recog_data
.operand
[i
],
5313 recog_data
.operand
[op_alt
[j
].matches
]))
5314 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5329 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5331 static unsigned HOST_WIDE_INT
5332 ix86_asan_shadow_offset (void)
5334 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_LP64
? 44 : 29);
5337 /* Argument support functions. */
5339 /* Return true when register may be used to pass function parameters. */
5341 ix86_function_arg_regno_p (int regno
)
5344 const int *parm_regs
;
5349 return (regno
< REGPARM_MAX
5350 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5352 return (regno
< REGPARM_MAX
5353 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5354 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5355 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5356 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5361 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5366 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5367 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5371 /* TODO: The function should depend on current function ABI but
5372 builtins.c would need updating then. Therefore we use the
5375 /* RAX is used as hidden argument to va_arg functions. */
5376 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5379 if (ix86_abi
== MS_ABI
)
5380 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5382 parm_regs
= x86_64_int_parameter_registers
;
5383 for (i
= 0; i
< (ix86_abi
== MS_ABI
5384 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5385 if (regno
== parm_regs
[i
])
5390 /* Return if we do not know how to pass TYPE solely in registers. */
5393 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5395 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5398 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5399 The layout_type routine is crafty and tries to trick us into passing
5400 currently unsupported vector types on the stack by using TImode. */
5401 return (!TARGET_64BIT
&& mode
== TImode
5402 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5405 /* It returns the size, in bytes, of the area reserved for arguments passed
5406 in registers for the function represented by fndecl dependent to the used
5409 ix86_reg_parm_stack_space (const_tree fndecl
)
5411 enum calling_abi call_abi
= SYSV_ABI
;
5412 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5413 call_abi
= ix86_function_abi (fndecl
);
5415 call_abi
= ix86_function_type_abi (fndecl
);
5416 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5421 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5424 ix86_function_type_abi (const_tree fntype
)
5426 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5428 enum calling_abi abi
= ix86_abi
;
5429 if (abi
== SYSV_ABI
)
5431 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5434 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5442 ix86_function_ms_hook_prologue (const_tree fn
)
5444 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5446 if (decl_function_context (fn
) != NULL_TREE
)
5447 error_at (DECL_SOURCE_LOCATION (fn
),
5448 "ms_hook_prologue is not compatible with nested function");
5455 static enum calling_abi
5456 ix86_function_abi (const_tree fndecl
)
5460 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5463 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5466 ix86_cfun_abi (void)
5470 return cfun
->machine
->call_abi
;
5473 /* Write the extra assembler code needed to declare a function properly. */
5476 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5479 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5483 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5484 unsigned int filler_cc
= 0xcccccccc;
5486 for (i
= 0; i
< filler_count
; i
+= 4)
5487 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5490 #ifdef SUBTARGET_ASM_UNWIND_INIT
5491 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5494 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5496 /* Output magic byte marker, if hot-patch attribute is set. */
5501 /* leaq [%rsp + 0], %rsp */
5502 asm_fprintf (asm_out_file
, ASM_BYTE
5503 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5507 /* movl.s %edi, %edi
5509 movl.s %esp, %ebp */
5510 asm_fprintf (asm_out_file
, ASM_BYTE
5511 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5517 extern void init_regs (void);
5519 /* Implementation of call abi switching target hook. Specific to FNDECL
5520 the specific call register sets are set. See also
5521 ix86_conditional_register_usage for more details. */
5523 ix86_call_abi_override (const_tree fndecl
)
5525 if (fndecl
== NULL_TREE
)
5526 cfun
->machine
->call_abi
= ix86_abi
;
5528 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5531 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5532 expensive re-initialization of init_regs each time we switch function context
5533 since this is needed only during RTL expansion. */
5535 ix86_maybe_switch_abi (void)
5538 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5542 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5543 for a call to a function whose data type is FNTYPE.
5544 For a library call, FNTYPE is 0. */
5547 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5548 tree fntype
, /* tree ptr for function decl */
5549 rtx libname
, /* SYMBOL_REF of library name or 0 */
5553 struct cgraph_local_info
*i
;
5555 memset (cum
, 0, sizeof (*cum
));
5559 i
= cgraph_local_info (fndecl
);
5560 cum
->call_abi
= ix86_function_abi (fndecl
);
5565 cum
->call_abi
= ix86_function_type_abi (fntype
);
5568 cum
->caller
= caller
;
5570 /* Set up the number of registers to use for passing arguments. */
5572 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5573 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5574 "or subtarget optimization implying it");
5575 cum
->nregs
= ix86_regparm
;
5578 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5579 ? X86_64_REGPARM_MAX
5580 : X86_64_MS_REGPARM_MAX
);
5584 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5587 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5588 ? X86_64_SSE_REGPARM_MAX
5589 : X86_64_MS_SSE_REGPARM_MAX
);
5593 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5594 cum
->warn_avx
= true;
5595 cum
->warn_sse
= true;
5596 cum
->warn_mmx
= true;
5598 /* Because type might mismatch in between caller and callee, we need to
5599 use actual type of function for local calls.
5600 FIXME: cgraph_analyze can be told to actually record if function uses
5601 va_start so for local functions maybe_vaarg can be made aggressive
5603 FIXME: once typesytem is fixed, we won't need this code anymore. */
5604 if (i
&& i
->local
&& i
->can_change_signature
)
5605 fntype
= TREE_TYPE (fndecl
);
5606 cum
->maybe_vaarg
= (fntype
5607 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5612 /* If there are variable arguments, then we won't pass anything
5613 in registers in 32-bit mode. */
5614 if (stdarg_p (fntype
))
5625 /* Use ecx and edx registers if function has fastcall attribute,
5626 else look for regparm information. */
5629 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5630 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5633 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5635 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5641 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5644 /* Set up the number of SSE registers used for passing SFmode
5645 and DFmode arguments. Warn for mismatching ABI. */
5646 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5650 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5651 But in the case of vector types, it is some vector mode.
5653 When we have only some of our vector isa extensions enabled, then there
5654 are some modes for which vector_mode_supported_p is false. For these
5655 modes, the generic vector support in gcc will choose some non-vector mode
5656 in order to implement the type. By computing the natural mode, we'll
5657 select the proper ABI location for the operand and not depend on whatever
5658 the middle-end decides to do with these vector types.
5660 The midde-end can't deal with the vector types > 16 bytes. In this
5661 case, we return the original mode and warn ABI change if CUM isn't
5664 static enum machine_mode
5665 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5667 enum machine_mode mode
= TYPE_MODE (type
);
5669 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5671 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5672 if ((size
== 8 || size
== 16 || size
== 32)
5673 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5674 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5676 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5678 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5679 mode
= MIN_MODE_VECTOR_FLOAT
;
5681 mode
= MIN_MODE_VECTOR_INT
;
5683 /* Get the mode which has this inner mode and number of units. */
5684 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5685 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5686 && GET_MODE_INNER (mode
) == innermode
)
5688 if (size
== 32 && !TARGET_AVX
)
5690 static bool warnedavx
;
5697 warning (0, "AVX vector argument without AVX "
5698 "enabled changes the ABI");
5700 return TYPE_MODE (type
);
5702 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5704 static bool warnedsse
;
5711 warning (0, "SSE vector argument without SSE "
5712 "enabled changes the ABI");
5727 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5728 this may not agree with the mode that the type system has chosen for the
5729 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5730 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5733 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5738 if (orig_mode
!= BLKmode
)
5739 tmp
= gen_rtx_REG (orig_mode
, regno
);
5742 tmp
= gen_rtx_REG (mode
, regno
);
5743 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5744 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5750 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5751 of this code is to classify each 8bytes of incoming argument by the register
5752 class and assign registers accordingly. */
5754 /* Return the union class of CLASS1 and CLASS2.
5755 See the x86-64 PS ABI for details. */
5757 static enum x86_64_reg_class
5758 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5760 /* Rule #1: If both classes are equal, this is the resulting class. */
5761 if (class1
== class2
)
5764 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5766 if (class1
== X86_64_NO_CLASS
)
5768 if (class2
== X86_64_NO_CLASS
)
5771 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5772 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5773 return X86_64_MEMORY_CLASS
;
5775 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5776 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5777 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5778 return X86_64_INTEGERSI_CLASS
;
5779 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5780 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5781 return X86_64_INTEGER_CLASS
;
5783 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5785 if (class1
== X86_64_X87_CLASS
5786 || class1
== X86_64_X87UP_CLASS
5787 || class1
== X86_64_COMPLEX_X87_CLASS
5788 || class2
== X86_64_X87_CLASS
5789 || class2
== X86_64_X87UP_CLASS
5790 || class2
== X86_64_COMPLEX_X87_CLASS
)
5791 return X86_64_MEMORY_CLASS
;
5793 /* Rule #6: Otherwise class SSE is used. */
5794 return X86_64_SSE_CLASS
;
5797 /* Classify the argument of type TYPE and mode MODE.
5798 CLASSES will be filled by the register class used to pass each word
5799 of the operand. The number of words is returned. In case the parameter
5800 should be passed in memory, 0 is returned. As a special case for zero
5801 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5803 BIT_OFFSET is used internally for handling records and specifies offset
5804 of the offset in bits modulo 256 to avoid overflow cases.
5806 See the x86-64 PS ABI for details.
5810 classify_argument (enum machine_mode mode
, const_tree type
,
5811 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5813 HOST_WIDE_INT bytes
=
5814 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5816 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5818 /* Variable sized entities are always passed/returned in memory. */
5822 if (mode
!= VOIDmode
5823 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5826 if (type
&& AGGREGATE_TYPE_P (type
))
5830 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5832 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5836 for (i
= 0; i
< words
; i
++)
5837 classes
[i
] = X86_64_NO_CLASS
;
5839 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5840 signalize memory class, so handle it as special case. */
5843 classes
[0] = X86_64_NO_CLASS
;
5847 /* Classify each field of record and merge classes. */
5848 switch (TREE_CODE (type
))
5851 /* And now merge the fields of structure. */
5852 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5854 if (TREE_CODE (field
) == FIELD_DECL
)
5858 if (TREE_TYPE (field
) == error_mark_node
)
5861 /* Bitfields are always classified as integer. Handle them
5862 early, since later code would consider them to be
5863 misaligned integers. */
5864 if (DECL_BIT_FIELD (field
))
5866 for (i
= (int_bit_position (field
)
5867 + (bit_offset
% 64)) / 8 / 8;
5868 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5869 + tree_low_cst (DECL_SIZE (field
), 0)
5872 merge_classes (X86_64_INTEGER_CLASS
,
5879 type
= TREE_TYPE (field
);
5881 /* Flexible array member is ignored. */
5882 if (TYPE_MODE (type
) == BLKmode
5883 && TREE_CODE (type
) == ARRAY_TYPE
5884 && TYPE_SIZE (type
) == NULL_TREE
5885 && TYPE_DOMAIN (type
) != NULL_TREE
5886 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5891 if (!warned
&& warn_psabi
)
5894 inform (input_location
,
5895 "the ABI of passing struct with"
5896 " a flexible array member has"
5897 " changed in GCC 4.4");
5901 num
= classify_argument (TYPE_MODE (type
), type
,
5903 (int_bit_position (field
)
5904 + bit_offset
) % 256);
5907 pos
= (int_bit_position (field
)
5908 + (bit_offset
% 64)) / 8 / 8;
5909 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5911 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5918 /* Arrays are handled as small records. */
5921 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5922 TREE_TYPE (type
), subclasses
, bit_offset
);
5926 /* The partial classes are now full classes. */
5927 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5928 subclasses
[0] = X86_64_SSE_CLASS
;
5929 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5930 && !((bit_offset
% 64) == 0 && bytes
== 4))
5931 subclasses
[0] = X86_64_INTEGER_CLASS
;
5933 for (i
= 0; i
< words
; i
++)
5934 classes
[i
] = subclasses
[i
% num
];
5939 case QUAL_UNION_TYPE
:
5940 /* Unions are similar to RECORD_TYPE but offset is always 0.
5942 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5944 if (TREE_CODE (field
) == FIELD_DECL
)
5948 if (TREE_TYPE (field
) == error_mark_node
)
5951 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
5952 TREE_TYPE (field
), subclasses
,
5956 for (i
= 0; i
< num
; i
++)
5957 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
5968 /* When size > 16 bytes, if the first one isn't
5969 X86_64_SSE_CLASS or any other ones aren't
5970 X86_64_SSEUP_CLASS, everything should be passed in
5972 if (classes
[0] != X86_64_SSE_CLASS
)
5975 for (i
= 1; i
< words
; i
++)
5976 if (classes
[i
] != X86_64_SSEUP_CLASS
)
5980 /* Final merger cleanup. */
5981 for (i
= 0; i
< words
; i
++)
5983 /* If one class is MEMORY, everything should be passed in
5985 if (classes
[i
] == X86_64_MEMORY_CLASS
)
5988 /* The X86_64_SSEUP_CLASS should be always preceded by
5989 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5990 if (classes
[i
] == X86_64_SSEUP_CLASS
5991 && classes
[i
- 1] != X86_64_SSE_CLASS
5992 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
5994 /* The first one should never be X86_64_SSEUP_CLASS. */
5995 gcc_assert (i
!= 0);
5996 classes
[i
] = X86_64_SSE_CLASS
;
5999 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6000 everything should be passed in memory. */
6001 if (classes
[i
] == X86_64_X87UP_CLASS
6002 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6006 /* The first one should never be X86_64_X87UP_CLASS. */
6007 gcc_assert (i
!= 0);
6008 if (!warned
&& warn_psabi
)
6011 inform (input_location
,
6012 "the ABI of passing union with long double"
6013 " has changed in GCC 4.4");
6021 /* Compute alignment needed. We align all types to natural boundaries with
6022 exception of XFmode that is aligned to 64bits. */
6023 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6025 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6028 mode_alignment
= 128;
6029 else if (mode
== XCmode
)
6030 mode_alignment
= 256;
6031 if (COMPLEX_MODE_P (mode
))
6032 mode_alignment
/= 2;
6033 /* Misaligned fields are always returned in memory. */
6034 if (bit_offset
% mode_alignment
)
6038 /* for V1xx modes, just use the base mode */
6039 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6040 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6041 mode
= GET_MODE_INNER (mode
);
6043 /* Classification of atomic types. */
6048 classes
[0] = X86_64_SSE_CLASS
;
6051 classes
[0] = X86_64_SSE_CLASS
;
6052 classes
[1] = X86_64_SSEUP_CLASS
;
6062 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6066 classes
[0] = X86_64_INTEGERSI_CLASS
;
6069 else if (size
<= 64)
6071 classes
[0] = X86_64_INTEGER_CLASS
;
6074 else if (size
<= 64+32)
6076 classes
[0] = X86_64_INTEGER_CLASS
;
6077 classes
[1] = X86_64_INTEGERSI_CLASS
;
6080 else if (size
<= 64+64)
6082 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6090 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6094 /* OImode shouldn't be used directly. */
6099 if (!(bit_offset
% 64))
6100 classes
[0] = X86_64_SSESF_CLASS
;
6102 classes
[0] = X86_64_SSE_CLASS
;
6105 classes
[0] = X86_64_SSEDF_CLASS
;
6108 classes
[0] = X86_64_X87_CLASS
;
6109 classes
[1] = X86_64_X87UP_CLASS
;
6112 classes
[0] = X86_64_SSE_CLASS
;
6113 classes
[1] = X86_64_SSEUP_CLASS
;
6116 classes
[0] = X86_64_SSE_CLASS
;
6117 if (!(bit_offset
% 64))
6123 if (!warned
&& warn_psabi
)
6126 inform (input_location
,
6127 "the ABI of passing structure with complex float"
6128 " member has changed in GCC 4.4");
6130 classes
[1] = X86_64_SSESF_CLASS
;
6134 classes
[0] = X86_64_SSEDF_CLASS
;
6135 classes
[1] = X86_64_SSEDF_CLASS
;
6138 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6141 /* This modes is larger than 16 bytes. */
6149 classes
[0] = X86_64_SSE_CLASS
;
6150 classes
[1] = X86_64_SSEUP_CLASS
;
6151 classes
[2] = X86_64_SSEUP_CLASS
;
6152 classes
[3] = X86_64_SSEUP_CLASS
;
6160 classes
[0] = X86_64_SSE_CLASS
;
6161 classes
[1] = X86_64_SSEUP_CLASS
;
6169 classes
[0] = X86_64_SSE_CLASS
;
6175 gcc_assert (VECTOR_MODE_P (mode
));
6180 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6182 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6183 classes
[0] = X86_64_INTEGERSI_CLASS
;
6185 classes
[0] = X86_64_INTEGER_CLASS
;
6186 classes
[1] = X86_64_INTEGER_CLASS
;
6187 return 1 + (bytes
> 8);
6191 /* Examine the argument and return set number of register required in each
6192 class. Return 0 iff parameter should be passed in memory. */
6194 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6195 int *int_nregs
, int *sse_nregs
)
6197 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6198 int n
= classify_argument (mode
, type
, regclass
, 0);
6204 for (n
--; n
>= 0; n
--)
6205 switch (regclass
[n
])
6207 case X86_64_INTEGER_CLASS
:
6208 case X86_64_INTEGERSI_CLASS
:
6211 case X86_64_SSE_CLASS
:
6212 case X86_64_SSESF_CLASS
:
6213 case X86_64_SSEDF_CLASS
:
6216 case X86_64_NO_CLASS
:
6217 case X86_64_SSEUP_CLASS
:
6219 case X86_64_X87_CLASS
:
6220 case X86_64_X87UP_CLASS
:
6224 case X86_64_COMPLEX_X87_CLASS
:
6225 return in_return
? 2 : 0;
6226 case X86_64_MEMORY_CLASS
:
6232 /* Construct container for the argument used by GCC interface. See
6233 FUNCTION_ARG for the detailed description. */
6236 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6237 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6238 const int *intreg
, int sse_regno
)
6240 /* The following variables hold the static issued_error state. */
6241 static bool issued_sse_arg_error
;
6242 static bool issued_sse_ret_error
;
6243 static bool issued_x87_ret_error
;
6245 enum machine_mode tmpmode
;
6247 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6248 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6252 int needed_sseregs
, needed_intregs
;
6253 rtx exp
[MAX_CLASSES
];
6256 n
= classify_argument (mode
, type
, regclass
, 0);
6259 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6262 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6265 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6266 some less clueful developer tries to use floating-point anyway. */
6267 if (needed_sseregs
&& !TARGET_SSE
)
6271 if (!issued_sse_ret_error
)
6273 error ("SSE register return with SSE disabled");
6274 issued_sse_ret_error
= true;
6277 else if (!issued_sse_arg_error
)
6279 error ("SSE register argument with SSE disabled");
6280 issued_sse_arg_error
= true;
6285 /* Likewise, error if the ABI requires us to return values in the
6286 x87 registers and the user specified -mno-80387. */
6287 if (!TARGET_80387
&& in_return
)
6288 for (i
= 0; i
< n
; i
++)
6289 if (regclass
[i
] == X86_64_X87_CLASS
6290 || regclass
[i
] == X86_64_X87UP_CLASS
6291 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6293 if (!issued_x87_ret_error
)
6295 error ("x87 register return with x87 disabled");
6296 issued_x87_ret_error
= true;
6301 /* First construct simple cases. Avoid SCmode, since we want to use
6302 single register to pass this type. */
6303 if (n
== 1 && mode
!= SCmode
)
6304 switch (regclass
[0])
6306 case X86_64_INTEGER_CLASS
:
6307 case X86_64_INTEGERSI_CLASS
:
6308 return gen_rtx_REG (mode
, intreg
[0]);
6309 case X86_64_SSE_CLASS
:
6310 case X86_64_SSESF_CLASS
:
6311 case X86_64_SSEDF_CLASS
:
6312 if (mode
!= BLKmode
)
6313 return gen_reg_or_parallel (mode
, orig_mode
,
6314 SSE_REGNO (sse_regno
));
6316 case X86_64_X87_CLASS
:
6317 case X86_64_COMPLEX_X87_CLASS
:
6318 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6319 case X86_64_NO_CLASS
:
6320 /* Zero sized array, struct or class. */
6326 && regclass
[0] == X86_64_SSE_CLASS
6327 && regclass
[1] == X86_64_SSEUP_CLASS
6329 return gen_reg_or_parallel (mode
, orig_mode
,
6330 SSE_REGNO (sse_regno
));
6332 && regclass
[0] == X86_64_SSE_CLASS
6333 && regclass
[1] == X86_64_SSEUP_CLASS
6334 && regclass
[2] == X86_64_SSEUP_CLASS
6335 && regclass
[3] == X86_64_SSEUP_CLASS
6337 return gen_reg_or_parallel (mode
, orig_mode
,
6338 SSE_REGNO (sse_regno
));
6340 && regclass
[0] == X86_64_X87_CLASS
6341 && regclass
[1] == X86_64_X87UP_CLASS
)
6342 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6345 && regclass
[0] == X86_64_INTEGER_CLASS
6346 && regclass
[1] == X86_64_INTEGER_CLASS
6347 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6348 && intreg
[0] + 1 == intreg
[1])
6349 return gen_rtx_REG (mode
, intreg
[0]);
6351 /* Otherwise figure out the entries of the PARALLEL. */
6352 for (i
= 0; i
< n
; i
++)
6356 switch (regclass
[i
])
6358 case X86_64_NO_CLASS
:
6360 case X86_64_INTEGER_CLASS
:
6361 case X86_64_INTEGERSI_CLASS
:
6362 /* Merge TImodes on aligned occasions here too. */
6363 if (i
* 8 + 8 > bytes
)
6365 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6366 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6370 /* We've requested 24 bytes we
6371 don't have mode for. Use DImode. */
6372 if (tmpmode
== BLKmode
)
6375 = gen_rtx_EXPR_LIST (VOIDmode
,
6376 gen_rtx_REG (tmpmode
, *intreg
),
6380 case X86_64_SSESF_CLASS
:
6382 = gen_rtx_EXPR_LIST (VOIDmode
,
6383 gen_rtx_REG (SFmode
,
6384 SSE_REGNO (sse_regno
)),
6388 case X86_64_SSEDF_CLASS
:
6390 = gen_rtx_EXPR_LIST (VOIDmode
,
6391 gen_rtx_REG (DFmode
,
6392 SSE_REGNO (sse_regno
)),
6396 case X86_64_SSE_CLASS
:
6404 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6414 && regclass
[1] == X86_64_SSEUP_CLASS
6415 && regclass
[2] == X86_64_SSEUP_CLASS
6416 && regclass
[3] == X86_64_SSEUP_CLASS
);
6424 = gen_rtx_EXPR_LIST (VOIDmode
,
6425 gen_rtx_REG (tmpmode
,
6426 SSE_REGNO (sse_regno
)),
6435 /* Empty aligned struct, union or class. */
6439 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6440 for (i
= 0; i
< nexps
; i
++)
6441 XVECEXP (ret
, 0, i
) = exp
[i
];
6445 /* Update the data in CUM to advance over an argument of mode MODE
6446 and data type TYPE. (TYPE is null for libcalls where that information
6447 may not be available.) */
6450 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6451 const_tree type
, HOST_WIDE_INT bytes
,
6452 HOST_WIDE_INT words
)
6468 cum
->words
+= words
;
6469 cum
->nregs
-= words
;
6470 cum
->regno
+= words
;
6472 if (cum
->nregs
<= 0)
6480 /* OImode shouldn't be used directly. */
6484 if (cum
->float_in_sse
< 2)
6487 if (cum
->float_in_sse
< 1)
6504 if (!type
|| !AGGREGATE_TYPE_P (type
))
6506 cum
->sse_words
+= words
;
6507 cum
->sse_nregs
-= 1;
6508 cum
->sse_regno
+= 1;
6509 if (cum
->sse_nregs
<= 0)
6523 if (!type
|| !AGGREGATE_TYPE_P (type
))
6525 cum
->mmx_words
+= words
;
6526 cum
->mmx_nregs
-= 1;
6527 cum
->mmx_regno
+= 1;
6528 if (cum
->mmx_nregs
<= 0)
6539 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6540 const_tree type
, HOST_WIDE_INT words
, bool named
)
6542 int int_nregs
, sse_nregs
;
6544 /* Unnamed 256bit vector mode parameters are passed on stack. */
6545 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6548 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6549 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6551 cum
->nregs
-= int_nregs
;
6552 cum
->sse_nregs
-= sse_nregs
;
6553 cum
->regno
+= int_nregs
;
6554 cum
->sse_regno
+= sse_nregs
;
6558 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6559 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6560 cum
->words
+= words
;
6565 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6566 HOST_WIDE_INT words
)
6568 /* Otherwise, this should be passed indirect. */
6569 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6571 cum
->words
+= words
;
6579 /* Update the data in CUM to advance over an argument of mode MODE and
6580 data type TYPE. (TYPE is null for libcalls where that information
6581 may not be available.) */
6584 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6585 const_tree type
, bool named
)
6587 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6588 HOST_WIDE_INT bytes
, words
;
6590 if (mode
== BLKmode
)
6591 bytes
= int_size_in_bytes (type
);
6593 bytes
= GET_MODE_SIZE (mode
);
6594 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6597 mode
= type_natural_mode (type
, NULL
);
6599 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6600 function_arg_advance_ms_64 (cum
, bytes
, words
);
6601 else if (TARGET_64BIT
)
6602 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6604 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6607 /* Define where to put the arguments to a function.
6608 Value is zero to push the argument on the stack,
6609 or a hard register in which to store the argument.
6611 MODE is the argument's machine mode.
6612 TYPE is the data type of the argument (as a tree).
6613 This is null for libcalls where that information may
6615 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6616 the preceding args and about the function being called.
6617 NAMED is nonzero if this argument is a named parameter
6618 (otherwise it is an extra parameter matching an ellipsis). */
6621 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6622 enum machine_mode orig_mode
, const_tree type
,
6623 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6625 static bool warnedsse
, warnedmmx
;
6627 /* Avoid the AL settings for the Unix64 ABI. */
6628 if (mode
== VOIDmode
)
6644 if (words
<= cum
->nregs
)
6646 int regno
= cum
->regno
;
6648 /* Fastcall allocates the first two DWORD (SImode) or
6649 smaller arguments to ECX and EDX if it isn't an
6655 || (type
&& AGGREGATE_TYPE_P (type
)))
6658 /* ECX not EAX is the first allocated register. */
6659 if (regno
== AX_REG
)
6662 return gen_rtx_REG (mode
, regno
);
6667 if (cum
->float_in_sse
< 2)
6670 if (cum
->float_in_sse
< 1)
6674 /* In 32bit, we pass TImode in xmm registers. */
6681 if (!type
|| !AGGREGATE_TYPE_P (type
))
6683 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6686 warning (0, "SSE vector argument without SSE enabled "
6690 return gen_reg_or_parallel (mode
, orig_mode
,
6691 cum
->sse_regno
+ FIRST_SSE_REG
);
6696 /* OImode shouldn't be used directly. */
6705 if (!type
|| !AGGREGATE_TYPE_P (type
))
6708 return gen_reg_or_parallel (mode
, orig_mode
,
6709 cum
->sse_regno
+ FIRST_SSE_REG
);
6719 if (!type
|| !AGGREGATE_TYPE_P (type
))
6721 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6724 warning (0, "MMX vector argument without MMX enabled "
6728 return gen_reg_or_parallel (mode
, orig_mode
,
6729 cum
->mmx_regno
+ FIRST_MMX_REG
);
6738 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6739 enum machine_mode orig_mode
, const_tree type
, bool named
)
6741 /* Handle a hidden AL argument containing number of registers
6742 for varargs x86-64 functions. */
6743 if (mode
== VOIDmode
)
6744 return GEN_INT (cum
->maybe_vaarg
6745 ? (cum
->sse_nregs
< 0
6746 ? X86_64_SSE_REGPARM_MAX
6761 /* Unnamed 256bit vector mode parameters are passed on stack. */
6767 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6769 &x86_64_int_parameter_registers
[cum
->regno
],
6774 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6775 enum machine_mode orig_mode
, bool named
,
6776 HOST_WIDE_INT bytes
)
6780 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6781 We use value of -2 to specify that current function call is MSABI. */
6782 if (mode
== VOIDmode
)
6783 return GEN_INT (-2);
6785 /* If we've run out of registers, it goes on the stack. */
6786 if (cum
->nregs
== 0)
6789 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6791 /* Only floating point modes are passed in anything but integer regs. */
6792 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6795 regno
= cum
->regno
+ FIRST_SSE_REG
;
6800 /* Unnamed floating parameters are passed in both the
6801 SSE and integer registers. */
6802 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6803 t2
= gen_rtx_REG (mode
, regno
);
6804 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6805 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6806 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6809 /* Handle aggregated types passed in register. */
6810 if (orig_mode
== BLKmode
)
6812 if (bytes
> 0 && bytes
<= 8)
6813 mode
= (bytes
> 4 ? DImode
: SImode
);
6814 if (mode
== BLKmode
)
6818 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6821 /* Return where to put the arguments to a function.
6822 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6824 MODE is the argument's machine mode. TYPE is the data type of the
6825 argument. It is null for libcalls where that information may not be
6826 available. CUM gives information about the preceding args and about
6827 the function being called. NAMED is nonzero if this argument is a
6828 named parameter (otherwise it is an extra parameter matching an
6832 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6833 const_tree type
, bool named
)
6835 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6836 enum machine_mode mode
= omode
;
6837 HOST_WIDE_INT bytes
, words
;
6840 if (mode
== BLKmode
)
6841 bytes
= int_size_in_bytes (type
);
6843 bytes
= GET_MODE_SIZE (mode
);
6844 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6846 /* To simplify the code below, represent vector types with a vector mode
6847 even if MMX/SSE are not active. */
6848 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6849 mode
= type_natural_mode (type
, cum
);
6851 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6852 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6853 else if (TARGET_64BIT
)
6854 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6856 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6861 /* A C expression that indicates when an argument must be passed by
6862 reference. If nonzero for an argument, a copy of that argument is
6863 made in memory and a pointer to the argument is passed instead of
6864 the argument itself. The pointer is passed in whatever way is
6865 appropriate for passing a pointer to that type. */
6868 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6869 enum machine_mode mode ATTRIBUTE_UNUSED
,
6870 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6872 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6874 /* See Windows x64 Software Convention. */
6875 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6877 int msize
= (int) GET_MODE_SIZE (mode
);
6880 /* Arrays are passed by reference. */
6881 if (TREE_CODE (type
) == ARRAY_TYPE
)
6884 if (AGGREGATE_TYPE_P (type
))
6886 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6887 are passed by reference. */
6888 msize
= int_size_in_bytes (type
);
6892 /* __m128 is passed by reference. */
6894 case 1: case 2: case 4: case 8:
6900 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6906 /* Return true when TYPE should be 128bit aligned for 32bit argument
6907 passing ABI. XXX: This function is obsolete and is only used for
6908 checking psABI compatibility with previous versions of GCC. */
6911 ix86_compat_aligned_value_p (const_tree type
)
6913 enum machine_mode mode
= TYPE_MODE (type
);
6914 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6918 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6920 if (TYPE_ALIGN (type
) < 128)
6923 if (AGGREGATE_TYPE_P (type
))
6925 /* Walk the aggregates recursively. */
6926 switch (TREE_CODE (type
))
6930 case QUAL_UNION_TYPE
:
6934 /* Walk all the structure fields. */
6935 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6937 if (TREE_CODE (field
) == FIELD_DECL
6938 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
6945 /* Just for use if some languages passes arrays by value. */
6946 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
6957 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6958 XXX: This function is obsolete and is only used for checking psABI
6959 compatibility with previous versions of GCC. */
6962 ix86_compat_function_arg_boundary (enum machine_mode mode
,
6963 const_tree type
, unsigned int align
)
6965 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6966 natural boundaries. */
6967 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
6969 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6970 make an exception for SSE modes since these require 128bit
6973 The handling here differs from field_alignment. ICC aligns MMX
6974 arguments to 4 byte boundaries, while structure fields are aligned
6975 to 8 byte boundaries. */
6978 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
6979 align
= PARM_BOUNDARY
;
6983 if (!ix86_compat_aligned_value_p (type
))
6984 align
= PARM_BOUNDARY
;
6987 if (align
> BIGGEST_ALIGNMENT
)
6988 align
= BIGGEST_ALIGNMENT
;
6992 /* Return true when TYPE should be 128bit aligned for 32bit argument
6996 ix86_contains_aligned_value_p (const_tree type
)
6998 enum machine_mode mode
= TYPE_MODE (type
);
7000 if (mode
== XFmode
|| mode
== XCmode
)
7003 if (TYPE_ALIGN (type
) < 128)
7006 if (AGGREGATE_TYPE_P (type
))
7008 /* Walk the aggregates recursively. */
7009 switch (TREE_CODE (type
))
7013 case QUAL_UNION_TYPE
:
7017 /* Walk all the structure fields. */
7018 for (field
= TYPE_FIELDS (type
);
7020 field
= DECL_CHAIN (field
))
7022 if (TREE_CODE (field
) == FIELD_DECL
7023 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7030 /* Just for use if some languages passes arrays by value. */
7031 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7040 return TYPE_ALIGN (type
) >= 128;
7045 /* Gives the alignment boundary, in bits, of an argument with the
7046 specified mode and type. */
7049 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7054 /* Since the main variant type is used for call, we convert it to
7055 the main variant type. */
7056 type
= TYPE_MAIN_VARIANT (type
);
7057 align
= TYPE_ALIGN (type
);
7060 align
= GET_MODE_ALIGNMENT (mode
);
7061 if (align
< PARM_BOUNDARY
)
7062 align
= PARM_BOUNDARY
;
7066 unsigned int saved_align
= align
;
7070 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7073 if (mode
== XFmode
|| mode
== XCmode
)
7074 align
= PARM_BOUNDARY
;
7076 else if (!ix86_contains_aligned_value_p (type
))
7077 align
= PARM_BOUNDARY
;
7080 align
= PARM_BOUNDARY
;
7085 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7089 inform (input_location
,
7090 "The ABI for passing parameters with %d-byte"
7091 " alignment has changed in GCC 4.6",
7092 align
/ BITS_PER_UNIT
);
7099 /* Return true if N is a possible register number of function value. */
7102 ix86_function_value_regno_p (const unsigned int regno
)
7109 case FIRST_FLOAT_REG
:
7110 /* TODO: The function should depend on current function ABI but
7111 builtins.c would need updating then. Therefore we use the
7113 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7115 return TARGET_FLOAT_RETURNS_IN_80387
;
7121 if (TARGET_MACHO
|| TARGET_64BIT
)
7129 /* Define how to find the value returned by a function.
7130 VALTYPE is the data type of the value (as a tree).
7131 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7132 otherwise, FUNC is 0. */
7135 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7136 const_tree fntype
, const_tree fn
)
7140 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7141 we normally prevent this case when mmx is not available. However
7142 some ABIs may require the result to be returned like DImode. */
7143 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7144 regno
= FIRST_MMX_REG
;
7146 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7147 we prevent this case when sse is not available. However some ABIs
7148 may require the result to be returned like integer TImode. */
7149 else if (mode
== TImode
7150 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7151 regno
= FIRST_SSE_REG
;
7153 /* 32-byte vector modes in %ymm0. */
7154 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7155 regno
= FIRST_SSE_REG
;
7157 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7158 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7159 regno
= FIRST_FLOAT_REG
;
7161 /* Most things go in %eax. */
7164 /* Override FP return register with %xmm0 for local functions when
7165 SSE math is enabled or for functions with sseregparm attribute. */
7166 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7168 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7169 if ((sse_level
>= 1 && mode
== SFmode
)
7170 || (sse_level
== 2 && mode
== DFmode
))
7171 regno
= FIRST_SSE_REG
;
7174 /* OImode shouldn't be used directly. */
7175 gcc_assert (mode
!= OImode
);
7177 return gen_rtx_REG (orig_mode
, regno
);
7181 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7186 /* Handle libcalls, which don't provide a type node. */
7187 if (valtype
== NULL
)
7201 regno
= FIRST_SSE_REG
;
7205 regno
= FIRST_FLOAT_REG
;
7213 return gen_rtx_REG (mode
, regno
);
7215 else if (POINTER_TYPE_P (valtype
))
7217 /* Pointers are always returned in word_mode. */
7221 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7222 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7223 x86_64_int_return_registers
, 0);
7225 /* For zero sized structures, construct_container returns NULL, but we
7226 need to keep rest of compiler happy by returning meaningful value. */
7228 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7234 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7236 unsigned int regno
= AX_REG
;
7240 switch (GET_MODE_SIZE (mode
))
7243 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7244 && !COMPLEX_MODE_P (mode
))
7245 regno
= FIRST_SSE_REG
;
7249 if (mode
== SFmode
|| mode
== DFmode
)
7250 regno
= FIRST_SSE_REG
;
7256 return gen_rtx_REG (orig_mode
, regno
);
7260 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7261 enum machine_mode orig_mode
, enum machine_mode mode
)
7263 const_tree fn
, fntype
;
7266 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7267 fn
= fntype_or_decl
;
7268 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7270 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7271 return function_value_ms_64 (orig_mode
, mode
);
7272 else if (TARGET_64BIT
)
7273 return function_value_64 (orig_mode
, mode
, valtype
);
7275 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7279 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7280 bool outgoing ATTRIBUTE_UNUSED
)
7282 enum machine_mode mode
, orig_mode
;
7284 orig_mode
= TYPE_MODE (valtype
);
7285 mode
= type_natural_mode (valtype
, NULL
);
7286 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7289 /* Pointer function arguments and return values are promoted to
7292 static enum machine_mode
7293 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7294 int *punsignedp
, const_tree fntype
,
7297 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7299 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7302 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7306 /* Return true if a structure, union or array with MODE containing FIELD
7307 should be accessed using BLKmode. */
7310 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7312 /* Union with XFmode must be in BLKmode. */
7313 return (mode
== XFmode
7314 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7315 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7319 ix86_libcall_value (enum machine_mode mode
)
7321 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7324 /* Return true iff type is returned in memory. */
7326 static bool ATTRIBUTE_UNUSED
7327 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7331 if (mode
== BLKmode
)
7334 size
= int_size_in_bytes (type
);
7336 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7339 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7341 /* User-created vectors small enough to fit in EAX. */
7345 /* MMX/3dNow values are returned in MM0,
7346 except when it doesn't exits or the ABI prescribes otherwise. */
7348 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7350 /* SSE values are returned in XMM0, except when it doesn't exist. */
7354 /* AVX values are returned in YMM0, except when it doesn't exist. */
7365 /* OImode shouldn't be used directly. */
7366 gcc_assert (mode
!= OImode
);
7371 static bool ATTRIBUTE_UNUSED
7372 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7374 int needed_intregs
, needed_sseregs
;
7375 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7378 static bool ATTRIBUTE_UNUSED
7379 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7381 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7383 /* __m128 is returned in xmm0. */
7384 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7385 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7388 /* Otherwise, the size must be exactly in [1248]. */
7389 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7393 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7395 #ifdef SUBTARGET_RETURN_IN_MEMORY
7396 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7398 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7402 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7403 return return_in_memory_ms_64 (type
, mode
);
7405 return return_in_memory_64 (type
, mode
);
7408 return return_in_memory_32 (type
, mode
);
7412 /* When returning SSE vector types, we have a choice of either
7413 (1) being abi incompatible with a -march switch, or
7414 (2) generating an error.
7415 Given no good solution, I think the safest thing is one warning.
7416 The user won't be able to use -Werror, but....
7418 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7419 called in response to actually generating a caller or callee that
7420 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7421 via aggregate_value_p for general type probing from tree-ssa. */
7424 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7426 static bool warnedsse
, warnedmmx
;
7428 if (!TARGET_64BIT
&& type
)
7430 /* Look at the return type of the function, not the function type. */
7431 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7433 if (!TARGET_SSE
&& !warnedsse
)
7436 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7439 warning (0, "SSE vector return without SSE enabled "
7444 if (!TARGET_MMX
&& !warnedmmx
)
7446 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7449 warning (0, "MMX vector return without MMX enabled "
7459 /* Create the va_list data type. */
7461 /* Returns the calling convention specific va_list date type.
7462 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7465 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7467 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7469 /* For i386 we use plain pointer to argument area. */
7470 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7471 return build_pointer_type (char_type_node
);
7473 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7474 type_decl
= build_decl (BUILTINS_LOCATION
,
7475 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7477 f_gpr
= build_decl (BUILTINS_LOCATION
,
7478 FIELD_DECL
, get_identifier ("gp_offset"),
7479 unsigned_type_node
);
7480 f_fpr
= build_decl (BUILTINS_LOCATION
,
7481 FIELD_DECL
, get_identifier ("fp_offset"),
7482 unsigned_type_node
);
7483 f_ovf
= build_decl (BUILTINS_LOCATION
,
7484 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7486 f_sav
= build_decl (BUILTINS_LOCATION
,
7487 FIELD_DECL
, get_identifier ("reg_save_area"),
7490 va_list_gpr_counter_field
= f_gpr
;
7491 va_list_fpr_counter_field
= f_fpr
;
7493 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7494 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7495 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7496 DECL_FIELD_CONTEXT (f_sav
) = record
;
7498 TYPE_STUB_DECL (record
) = type_decl
;
7499 TYPE_NAME (record
) = type_decl
;
7500 TYPE_FIELDS (record
) = f_gpr
;
7501 DECL_CHAIN (f_gpr
) = f_fpr
;
7502 DECL_CHAIN (f_fpr
) = f_ovf
;
7503 DECL_CHAIN (f_ovf
) = f_sav
;
7505 layout_type (record
);
7507 /* The correct type is an array type of one element. */
7508 return build_array_type (record
, build_index_type (size_zero_node
));
7511 /* Setup the builtin va_list data type and for 64-bit the additional
7512 calling convention specific va_list data types. */
7515 ix86_build_builtin_va_list (void)
7517 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7519 /* Initialize abi specific va_list builtin types. */
7523 if (ix86_abi
== MS_ABI
)
7525 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7526 if (TREE_CODE (t
) != RECORD_TYPE
)
7527 t
= build_variant_type_copy (t
);
7528 sysv_va_list_type_node
= t
;
7533 if (TREE_CODE (t
) != RECORD_TYPE
)
7534 t
= build_variant_type_copy (t
);
7535 sysv_va_list_type_node
= t
;
7537 if (ix86_abi
!= MS_ABI
)
7539 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7540 if (TREE_CODE (t
) != RECORD_TYPE
)
7541 t
= build_variant_type_copy (t
);
7542 ms_va_list_type_node
= t
;
7547 if (TREE_CODE (t
) != RECORD_TYPE
)
7548 t
= build_variant_type_copy (t
);
7549 ms_va_list_type_node
= t
;
7556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7559 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7565 /* GPR size of varargs save area. */
7566 if (cfun
->va_list_gpr_size
)
7567 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7569 ix86_varargs_gpr_size
= 0;
7571 /* FPR size of varargs save area. We don't need it if we don't pass
7572 anything in SSE registers. */
7573 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7574 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7576 ix86_varargs_fpr_size
= 0;
7578 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7581 save_area
= frame_pointer_rtx
;
7582 set
= get_varargs_alias_set ();
7584 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7585 if (max
> X86_64_REGPARM_MAX
)
7586 max
= X86_64_REGPARM_MAX
;
7588 for (i
= cum
->regno
; i
< max
; i
++)
7590 mem
= gen_rtx_MEM (word_mode
,
7591 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7592 MEM_NOTRAP_P (mem
) = 1;
7593 set_mem_alias_set (mem
, set
);
7594 emit_move_insn (mem
,
7595 gen_rtx_REG (word_mode
,
7596 x86_64_int_parameter_registers
[i
]));
7599 if (ix86_varargs_fpr_size
)
7601 enum machine_mode smode
;
7604 /* Now emit code to save SSE registers. The AX parameter contains number
7605 of SSE parameter registers used to call this function, though all we
7606 actually check here is the zero/non-zero status. */
7608 label
= gen_label_rtx ();
7609 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7610 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7613 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7614 we used movdqa (i.e. TImode) instead? Perhaps even better would
7615 be if we could determine the real mode of the data, via a hook
7616 into pass_stdarg. Ignore all that for now. */
7618 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7619 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7621 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7622 if (max
> X86_64_SSE_REGPARM_MAX
)
7623 max
= X86_64_SSE_REGPARM_MAX
;
7625 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7627 mem
= plus_constant (Pmode
, save_area
,
7628 i
* 16 + ix86_varargs_gpr_size
);
7629 mem
= gen_rtx_MEM (smode
, mem
);
7630 MEM_NOTRAP_P (mem
) = 1;
7631 set_mem_alias_set (mem
, set
);
7632 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7634 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7642 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7644 alias_set_type set
= get_varargs_alias_set ();
7647 /* Reset to zero, as there might be a sysv vaarg used
7649 ix86_varargs_gpr_size
= 0;
7650 ix86_varargs_fpr_size
= 0;
7652 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7656 mem
= gen_rtx_MEM (Pmode
,
7657 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7658 i
* UNITS_PER_WORD
));
7659 MEM_NOTRAP_P (mem
) = 1;
7660 set_mem_alias_set (mem
, set
);
7662 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7663 emit_move_insn (mem
, reg
);
7668 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7669 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7672 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7673 CUMULATIVE_ARGS next_cum
;
7676 /* This argument doesn't appear to be used anymore. Which is good,
7677 because the old code here didn't suppress rtl generation. */
7678 gcc_assert (!no_rtl
);
7683 fntype
= TREE_TYPE (current_function_decl
);
7685 /* For varargs, we do not want to skip the dummy va_dcl argument.
7686 For stdargs, we do want to skip the last named argument. */
7688 if (stdarg_p (fntype
))
7689 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7692 if (cum
->call_abi
== MS_ABI
)
7693 setup_incoming_varargs_ms_64 (&next_cum
);
7695 setup_incoming_varargs_64 (&next_cum
);
7698 /* Checks if TYPE is of kind va_list char *. */
7701 is_va_list_char_pointer (tree type
)
7705 /* For 32-bit it is always true. */
7708 canonic
= ix86_canonical_va_list_type (type
);
7709 return (canonic
== ms_va_list_type_node
7710 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7713 /* Implement va_start. */
7716 ix86_va_start (tree valist
, rtx nextarg
)
7718 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7719 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7720 tree gpr
, fpr
, ovf
, sav
, t
;
7724 if (flag_split_stack
7725 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7727 unsigned int scratch_regno
;
7729 /* When we are splitting the stack, we can't refer to the stack
7730 arguments using internal_arg_pointer, because they may be on
7731 the old stack. The split stack prologue will arrange to
7732 leave a pointer to the old stack arguments in a scratch
7733 register, which we here copy to a pseudo-register. The split
7734 stack prologue can't set the pseudo-register directly because
7735 it (the prologue) runs before any registers have been saved. */
7737 scratch_regno
= split_stack_prologue_scratch_regno ();
7738 if (scratch_regno
!= INVALID_REGNUM
)
7742 reg
= gen_reg_rtx (Pmode
);
7743 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7746 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7750 push_topmost_sequence ();
7751 emit_insn_after (seq
, entry_of_function ());
7752 pop_topmost_sequence ();
7756 /* Only 64bit target needs something special. */
7757 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7759 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7760 std_expand_builtin_va_start (valist
, nextarg
);
7765 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7766 next
= expand_binop (ptr_mode
, add_optab
,
7767 cfun
->machine
->split_stack_varargs_pointer
,
7768 crtl
->args
.arg_offset_rtx
,
7769 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7770 convert_move (va_r
, next
, 0);
7775 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7776 f_fpr
= DECL_CHAIN (f_gpr
);
7777 f_ovf
= DECL_CHAIN (f_fpr
);
7778 f_sav
= DECL_CHAIN (f_ovf
);
7780 valist
= build_simple_mem_ref (valist
);
7781 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7782 /* The following should be folded into the MEM_REF offset. */
7783 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7785 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7787 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7789 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7792 /* Count number of gp and fp argument registers used. */
7793 words
= crtl
->args
.info
.words
;
7794 n_gpr
= crtl
->args
.info
.regno
;
7795 n_fpr
= crtl
->args
.info
.sse_regno
;
7797 if (cfun
->va_list_gpr_size
)
7799 type
= TREE_TYPE (gpr
);
7800 t
= build2 (MODIFY_EXPR
, type
,
7801 gpr
, build_int_cst (type
, n_gpr
* 8));
7802 TREE_SIDE_EFFECTS (t
) = 1;
7803 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7806 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7808 type
= TREE_TYPE (fpr
);
7809 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7810 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7811 TREE_SIDE_EFFECTS (t
) = 1;
7812 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7815 /* Find the overflow area. */
7816 type
= TREE_TYPE (ovf
);
7817 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7818 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7820 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7821 t
= make_tree (type
, ovf_rtx
);
7823 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7824 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7825 TREE_SIDE_EFFECTS (t
) = 1;
7826 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7828 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7830 /* Find the register save area.
7831 Prologue of the function save it right above stack frame. */
7832 type
= TREE_TYPE (sav
);
7833 t
= make_tree (type
, frame_pointer_rtx
);
7834 if (!ix86_varargs_gpr_size
)
7835 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7836 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7837 TREE_SIDE_EFFECTS (t
) = 1;
7838 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7842 /* Implement va_arg. */
7845 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7848 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7849 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7850 tree gpr
, fpr
, ovf
, sav
, t
;
7852 tree lab_false
, lab_over
= NULL_TREE
;
7857 enum machine_mode nat_mode
;
7858 unsigned int arg_boundary
;
7860 /* Only 64bit target needs something special. */
7861 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7862 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7864 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7865 f_fpr
= DECL_CHAIN (f_gpr
);
7866 f_ovf
= DECL_CHAIN (f_fpr
);
7867 f_sav
= DECL_CHAIN (f_ovf
);
7869 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7870 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7871 valist
= build_va_arg_indirect_ref (valist
);
7872 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7873 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7874 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7876 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7878 type
= build_pointer_type (type
);
7879 size
= int_size_in_bytes (type
);
7880 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7882 nat_mode
= type_natural_mode (type
, NULL
);
7891 /* Unnamed 256bit vector mode parameters are passed on stack. */
7892 if (!TARGET_64BIT_MS_ABI
)
7899 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7900 type
, 0, X86_64_REGPARM_MAX
,
7901 X86_64_SSE_REGPARM_MAX
, intreg
,
7906 /* Pull the value out of the saved registers. */
7908 addr
= create_tmp_var (ptr_type_node
, "addr");
7912 int needed_intregs
, needed_sseregs
;
7914 tree int_addr
, sse_addr
;
7916 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7917 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7919 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7921 need_temp
= (!REG_P (container
)
7922 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7923 || TYPE_ALIGN (type
) > 128));
7925 /* In case we are passing structure, verify that it is consecutive block
7926 on the register save area. If not we need to do moves. */
7927 if (!need_temp
&& !REG_P (container
))
7929 /* Verify that all registers are strictly consecutive */
7930 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7934 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7936 rtx slot
= XVECEXP (container
, 0, i
);
7937 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7938 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7946 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7948 rtx slot
= XVECEXP (container
, 0, i
);
7949 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
7950 || INTVAL (XEXP (slot
, 1)) != i
* 8)
7962 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
7963 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
7966 /* First ensure that we fit completely in registers. */
7969 t
= build_int_cst (TREE_TYPE (gpr
),
7970 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
7971 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
7972 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7973 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7974 gimplify_and_add (t
, pre_p
);
7978 t
= build_int_cst (TREE_TYPE (fpr
),
7979 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
7980 + X86_64_REGPARM_MAX
* 8);
7981 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
7982 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7983 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7984 gimplify_and_add (t
, pre_p
);
7987 /* Compute index to start of area used for integer regs. */
7990 /* int_addr = gpr + sav; */
7991 t
= fold_build_pointer_plus (sav
, gpr
);
7992 gimplify_assign (int_addr
, t
, pre_p
);
7996 /* sse_addr = fpr + sav; */
7997 t
= fold_build_pointer_plus (sav
, fpr
);
7998 gimplify_assign (sse_addr
, t
, pre_p
);
8002 int i
, prev_size
= 0;
8003 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8006 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8007 gimplify_assign (addr
, t
, pre_p
);
8009 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8011 rtx slot
= XVECEXP (container
, 0, i
);
8012 rtx reg
= XEXP (slot
, 0);
8013 enum machine_mode mode
= GET_MODE (reg
);
8019 tree dest_addr
, dest
;
8020 int cur_size
= GET_MODE_SIZE (mode
);
8022 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8023 prev_size
= INTVAL (XEXP (slot
, 1));
8024 if (prev_size
+ cur_size
> size
)
8026 cur_size
= size
- prev_size
;
8027 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8028 if (mode
== BLKmode
)
8031 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8032 if (mode
== GET_MODE (reg
))
8033 addr_type
= build_pointer_type (piece_type
);
8035 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8037 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8040 if (SSE_REGNO_P (REGNO (reg
)))
8042 src_addr
= sse_addr
;
8043 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8047 src_addr
= int_addr
;
8048 src_offset
= REGNO (reg
) * 8;
8050 src_addr
= fold_convert (addr_type
, src_addr
);
8051 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8053 dest_addr
= fold_convert (daddr_type
, addr
);
8054 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8055 if (cur_size
== GET_MODE_SIZE (mode
))
8057 src
= build_va_arg_indirect_ref (src_addr
);
8058 dest
= build_va_arg_indirect_ref (dest_addr
);
8060 gimplify_assign (dest
, src
, pre_p
);
8065 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8066 3, dest_addr
, src_addr
,
8067 size_int (cur_size
));
8068 gimplify_and_add (copy
, pre_p
);
8070 prev_size
+= cur_size
;
8076 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8077 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8078 gimplify_assign (gpr
, t
, pre_p
);
8083 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8084 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8085 gimplify_assign (fpr
, t
, pre_p
);
8088 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8090 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8093 /* ... otherwise out of the overflow area. */
8095 /* When we align parameter on stack for caller, if the parameter
8096 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8097 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8098 here with caller. */
8099 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8100 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8101 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8103 /* Care for on-stack alignment if needed. */
8104 if (arg_boundary
<= 64 || size
== 0)
8108 HOST_WIDE_INT align
= arg_boundary
/ 8;
8109 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8110 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8111 build_int_cst (TREE_TYPE (t
), -align
));
8114 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8115 gimplify_assign (addr
, t
, pre_p
);
8117 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8118 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8121 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8123 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8124 addr
= fold_convert (ptrtype
, addr
);
8127 addr
= build_va_arg_indirect_ref (addr
);
8128 return build_va_arg_indirect_ref (addr
);
8131 /* Return true if OPNUM's MEM should be matched
8132 in movabs* patterns. */
8135 ix86_check_movabs (rtx insn
, int opnum
)
8139 set
= PATTERN (insn
);
8140 if (GET_CODE (set
) == PARALLEL
)
8141 set
= XVECEXP (set
, 0, 0);
8142 gcc_assert (GET_CODE (set
) == SET
);
8143 mem
= XEXP (set
, opnum
);
8144 while (GET_CODE (mem
) == SUBREG
)
8145 mem
= SUBREG_REG (mem
);
8146 gcc_assert (MEM_P (mem
));
8147 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8150 /* Initialize the table of extra 80387 mathematical constants. */
8153 init_ext_80387_constants (void)
8155 static const char * cst
[5] =
8157 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8158 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8159 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8160 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8161 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8165 for (i
= 0; i
< 5; i
++)
8167 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8168 /* Ensure each constant is rounded to XFmode precision. */
8169 real_convert (&ext_80387_constants_table
[i
],
8170 XFmode
, &ext_80387_constants_table
[i
]);
8173 ext_80387_constants_init
= 1;
8176 /* Return non-zero if the constant is something that
8177 can be loaded with a special instruction. */
8180 standard_80387_constant_p (rtx x
)
8182 enum machine_mode mode
= GET_MODE (x
);
8186 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8189 if (x
== CONST0_RTX (mode
))
8191 if (x
== CONST1_RTX (mode
))
8194 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8196 /* For XFmode constants, try to find a special 80387 instruction when
8197 optimizing for size or on those CPUs that benefit from them. */
8199 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8203 if (! ext_80387_constants_init
)
8204 init_ext_80387_constants ();
8206 for (i
= 0; i
< 5; i
++)
8207 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8211 /* Load of the constant -0.0 or -1.0 will be split as
8212 fldz;fchs or fld1;fchs sequence. */
8213 if (real_isnegzero (&r
))
8215 if (real_identical (&r
, &dconstm1
))
8221 /* Return the opcode of the special instruction to be used to load
8225 standard_80387_constant_opcode (rtx x
)
8227 switch (standard_80387_constant_p (x
))
8251 /* Return the CONST_DOUBLE representing the 80387 constant that is
8252 loaded by the specified special instruction. The argument IDX
8253 matches the return value from standard_80387_constant_p. */
8256 standard_80387_constant_rtx (int idx
)
8260 if (! ext_80387_constants_init
)
8261 init_ext_80387_constants ();
8277 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8281 /* Return 1 if X is all 0s and 2 if x is all 1s
8282 in supported SSE/AVX vector mode. */
8285 standard_sse_constant_p (rtx x
)
8287 enum machine_mode mode
= GET_MODE (x
);
8289 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8291 if (vector_all_ones_operand (x
, mode
))
8313 /* Return the opcode of the special instruction to be used to load
8317 standard_sse_constant_opcode (rtx insn
, rtx x
)
8319 switch (standard_sse_constant_p (x
))
8322 switch (get_attr_mode (insn
))
8325 return "%vpxor\t%0, %d0";
8327 return "%vxorpd\t%0, %d0";
8329 return "%vxorps\t%0, %d0";
8332 return "vpxor\t%x0, %x0, %x0";
8334 return "vxorpd\t%x0, %x0, %x0";
8336 return "vxorps\t%x0, %x0, %x0";
8344 return "vpcmpeqd\t%0, %0, %0";
8346 return "pcmpeqd\t%0, %0";
8354 /* Returns true if OP contains a symbol reference */
8357 symbolic_reference_mentioned_p (rtx op
)
8362 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8365 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8366 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8372 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8373 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8377 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8384 /* Return true if it is appropriate to emit `ret' instructions in the
8385 body of a function. Do this only if the epilogue is simple, needing a
8386 couple of insns. Prior to reloading, we can't tell how many registers
8387 must be saved, so return false then. Return false if there is no frame
8388 marker to de-allocate. */
8391 ix86_can_use_return_insn_p (void)
8393 struct ix86_frame frame
;
8395 if (! reload_completed
|| frame_pointer_needed
)
8398 /* Don't allow more than 32k pop, since that's all we can do
8399 with one instruction. */
8400 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8403 ix86_compute_frame_layout (&frame
);
8404 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8405 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8408 /* Value should be nonzero if functions must have frame pointers.
8409 Zero means the frame pointer need not be set up (and parms may
8410 be accessed via the stack pointer) in functions that seem suitable. */
8413 ix86_frame_pointer_required (void)
8415 /* If we accessed previous frames, then the generated code expects
8416 to be able to access the saved ebp value in our frame. */
8417 if (cfun
->machine
->accesses_prev_frame
)
8420 /* Several x86 os'es need a frame pointer for other reasons,
8421 usually pertaining to setjmp. */
8422 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8425 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8426 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8429 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8430 allocation is 4GB. */
8431 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8434 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8435 turns off the frame pointer by default. Turn it back on now if
8436 we've not got a leaf function. */
8437 if (TARGET_OMIT_LEAF_FRAME_POINTER
8439 || ix86_current_function_calls_tls_descriptor
))
8442 if (crtl
->profile
&& !flag_fentry
)
8448 /* Record that the current function accesses previous call frames. */
8451 ix86_setup_frame_addresses (void)
8453 cfun
->machine
->accesses_prev_frame
= 1;
8456 #ifndef USE_HIDDEN_LINKONCE
8457 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8458 # define USE_HIDDEN_LINKONCE 1
8460 # define USE_HIDDEN_LINKONCE 0
8464 static int pic_labels_used
;
8466 /* Fills in the label name that should be used for a pc thunk for
8467 the given register. */
8470 get_pc_thunk_name (char name
[32], unsigned int regno
)
8472 gcc_assert (!TARGET_64BIT
);
8474 if (USE_HIDDEN_LINKONCE
)
8475 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8477 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8481 /* This function generates code for -fpic that loads %ebx with
8482 the return address of the caller and then returns. */
8485 ix86_code_end (void)
8490 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8495 if (!(pic_labels_used
& (1 << regno
)))
8498 get_pc_thunk_name (name
, regno
);
8500 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8501 get_identifier (name
),
8502 build_function_type_list (void_type_node
, NULL_TREE
));
8503 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8504 NULL_TREE
, void_type_node
);
8505 TREE_PUBLIC (decl
) = 1;
8506 TREE_STATIC (decl
) = 1;
8507 DECL_IGNORED_P (decl
) = 1;
8512 switch_to_section (darwin_sections
[text_coal_section
]);
8513 fputs ("\t.weak_definition\t", asm_out_file
);
8514 assemble_name (asm_out_file
, name
);
8515 fputs ("\n\t.private_extern\t", asm_out_file
);
8516 assemble_name (asm_out_file
, name
);
8517 putc ('\n', asm_out_file
);
8518 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8519 DECL_WEAK (decl
) = 1;
8523 if (USE_HIDDEN_LINKONCE
)
8525 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8527 targetm
.asm_out
.unique_section (decl
, 0);
8528 switch_to_section (get_named_section (decl
, NULL
, 0));
8530 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8531 fputs ("\t.hidden\t", asm_out_file
);
8532 assemble_name (asm_out_file
, name
);
8533 putc ('\n', asm_out_file
);
8534 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8538 switch_to_section (text_section
);
8539 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8542 DECL_INITIAL (decl
) = make_node (BLOCK
);
8543 current_function_decl
= decl
;
8544 init_function_start (decl
);
8545 first_function_block_is_cold
= false;
8546 /* Make sure unwind info is emitted for the thunk if needed. */
8547 final_start_function (emit_barrier (), asm_out_file
, 1);
8549 /* Pad stack IP move with 4 instructions (two NOPs count
8550 as one instruction). */
8551 if (TARGET_PAD_SHORT_FUNCTION
)
8556 fputs ("\tnop\n", asm_out_file
);
8559 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8560 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8561 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8562 fputs ("\tret\n", asm_out_file
);
8563 final_end_function ();
8564 init_insn_lengths ();
8565 free_after_compilation (cfun
);
8567 current_function_decl
= NULL
;
8570 if (flag_split_stack
)
8571 file_end_indicate_split_stack ();
8574 /* Emit code for the SET_GOT patterns. */
8577 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8583 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8585 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8586 xops
[2] = gen_rtx_MEM (Pmode
,
8587 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8588 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8590 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8591 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8592 an unadorned address. */
8593 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8594 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8595 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8599 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8603 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8605 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8608 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8609 is what will be referenced by the Mach-O PIC subsystem. */
8611 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8614 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8615 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8620 get_pc_thunk_name (name
, REGNO (dest
));
8621 pic_labels_used
|= 1 << REGNO (dest
);
8623 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8624 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8625 output_asm_insn ("call\t%X2", xops
);
8626 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8627 is what will be referenced by the Mach-O PIC subsystem. */
8630 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8632 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8633 CODE_LABEL_NUMBER (label
));
8638 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8643 /* Generate an "push" pattern for input ARG. */
8648 struct machine_function
*m
= cfun
->machine
;
8650 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8651 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8652 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8654 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8655 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8657 return gen_rtx_SET (VOIDmode
,
8658 gen_rtx_MEM (word_mode
,
8659 gen_rtx_PRE_DEC (Pmode
,
8660 stack_pointer_rtx
)),
8664 /* Generate an "pop" pattern for input ARG. */
8669 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8670 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8672 return gen_rtx_SET (VOIDmode
,
8674 gen_rtx_MEM (word_mode
,
8675 gen_rtx_POST_INC (Pmode
,
8676 stack_pointer_rtx
)));
8679 /* Return >= 0 if there is an unused call-clobbered register available
8680 for the entire function. */
8683 ix86_select_alt_pic_regnum (void)
8687 && !ix86_current_function_calls_tls_descriptor
)
8690 /* Can't use the same register for both PIC and DRAP. */
8692 drap
= REGNO (crtl
->drap_reg
);
8695 for (i
= 2; i
>= 0; --i
)
8696 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8700 return INVALID_REGNUM
;
8703 /* Return TRUE if we need to save REGNO. */
8706 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8708 if (pic_offset_table_rtx
8709 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8710 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8712 || crtl
->calls_eh_return
8713 || crtl
->uses_const_pool
))
8714 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8716 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8721 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8722 if (test
== INVALID_REGNUM
)
8729 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8732 return (df_regs_ever_live_p (regno
)
8733 && !call_used_regs
[regno
]
8734 && !fixed_regs
[regno
]
8735 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8738 /* Return number of saved general prupose registers. */
8741 ix86_nsaved_regs (void)
8746 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8747 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8752 /* Return number of saved SSE registrers. */
8755 ix86_nsaved_sseregs (void)
8760 if (!TARGET_64BIT_MS_ABI
)
8762 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8763 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8768 /* Given FROM and TO register numbers, say whether this elimination is
8769 allowed. If stack alignment is needed, we can only replace argument
8770 pointer with hard frame pointer, or replace frame pointer with stack
8771 pointer. Otherwise, frame pointer elimination is automatically
8772 handled and all other eliminations are valid. */
8775 ix86_can_eliminate (const int from
, const int to
)
8777 if (stack_realign_fp
)
8778 return ((from
== ARG_POINTER_REGNUM
8779 && to
== HARD_FRAME_POINTER_REGNUM
)
8780 || (from
== FRAME_POINTER_REGNUM
8781 && to
== STACK_POINTER_REGNUM
));
8783 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8786 /* Return the offset between two registers, one to be eliminated, and the other
8787 its replacement, at the start of a routine. */
8790 ix86_initial_elimination_offset (int from
, int to
)
8792 struct ix86_frame frame
;
8793 ix86_compute_frame_layout (&frame
);
8795 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8796 return frame
.hard_frame_pointer_offset
;
8797 else if (from
== FRAME_POINTER_REGNUM
8798 && to
== HARD_FRAME_POINTER_REGNUM
)
8799 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8802 gcc_assert (to
== STACK_POINTER_REGNUM
);
8804 if (from
== ARG_POINTER_REGNUM
)
8805 return frame
.stack_pointer_offset
;
8807 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8808 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8812 /* In a dynamically-aligned function, we can't know the offset from
8813 stack pointer to frame pointer, so we must ensure that setjmp
8814 eliminates fp against the hard fp (%ebp) rather than trying to
8815 index from %esp up to the top of the frame across a gap that is
8816 of unknown (at compile-time) size. */
8818 ix86_builtin_setjmp_frame_value (void)
8820 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8823 /* When using -fsplit-stack, the allocation routines set a field in
8824 the TCB to the bottom of the stack plus this much space, measured
8827 #define SPLIT_STACK_AVAILABLE 256
8829 /* Fill structure ix86_frame about frame of currently computed function. */
8832 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8834 unsigned HOST_WIDE_INT stack_alignment_needed
;
8835 HOST_WIDE_INT offset
;
8836 unsigned HOST_WIDE_INT preferred_alignment
;
8837 HOST_WIDE_INT size
= get_frame_size ();
8838 HOST_WIDE_INT to_allocate
;
8840 frame
->nregs
= ix86_nsaved_regs ();
8841 frame
->nsseregs
= ix86_nsaved_sseregs ();
8843 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8844 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8846 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8847 function prologues and leaf. */
8848 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8849 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
8850 || ix86_current_function_calls_tls_descriptor
))
8852 preferred_alignment
= 16;
8853 stack_alignment_needed
= 16;
8854 crtl
->preferred_stack_boundary
= 128;
8855 crtl
->stack_alignment_needed
= 128;
8858 gcc_assert (!size
|| stack_alignment_needed
);
8859 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8860 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8862 /* For SEH we have to limit the amount of code movement into the prologue.
8863 At present we do this via a BLOCKAGE, at which point there's very little
8864 scheduling that can be done, which means that there's very little point
8865 in doing anything except PUSHs. */
8867 cfun
->machine
->use_fast_prologue_epilogue
= false;
8869 /* During reload iteration the amount of registers saved can change.
8870 Recompute the value as needed. Do not recompute when amount of registers
8871 didn't change as reload does multiple calls to the function and does not
8872 expect the decision to change within single iteration. */
8873 else if (!optimize_function_for_size_p (cfun
)
8874 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8876 int count
= frame
->nregs
;
8877 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8879 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8881 /* The fast prologue uses move instead of push to save registers. This
8882 is significantly longer, but also executes faster as modern hardware
8883 can execute the moves in parallel, but can't do that for push/pop.
8885 Be careful about choosing what prologue to emit: When function takes
8886 many instructions to execute we may use slow version as well as in
8887 case function is known to be outside hot spot (this is known with
8888 feedback only). Weight the size of function by number of registers
8889 to save as it is cheap to use one or two push instructions but very
8890 slow to use many of them. */
8892 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8893 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8894 || (flag_branch_probabilities
8895 && node
->frequency
< NODE_FREQUENCY_HOT
))
8896 cfun
->machine
->use_fast_prologue_epilogue
= false;
8898 cfun
->machine
->use_fast_prologue_epilogue
8899 = !expensive_function_p (count
);
8902 frame
->save_regs_using_mov
8903 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8904 /* If static stack checking is enabled and done with probes,
8905 the registers need to be saved before allocating the frame. */
8906 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8908 /* Skip return address. */
8909 offset
= UNITS_PER_WORD
;
8911 /* Skip pushed static chain. */
8912 if (ix86_static_chain_on_stack
)
8913 offset
+= UNITS_PER_WORD
;
8915 /* Skip saved base pointer. */
8916 if (frame_pointer_needed
)
8917 offset
+= UNITS_PER_WORD
;
8918 frame
->hfp_save_offset
= offset
;
8920 /* The traditional frame pointer location is at the top of the frame. */
8921 frame
->hard_frame_pointer_offset
= offset
;
8923 /* Register save area */
8924 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8925 frame
->reg_save_offset
= offset
;
8927 /* On SEH target, registers are pushed just before the frame pointer
8930 frame
->hard_frame_pointer_offset
= offset
;
8932 /* Align and set SSE register save area. */
8933 if (frame
->nsseregs
)
8935 /* The only ABI that has saved SSE registers (Win64) also has a
8936 16-byte aligned default stack, and thus we don't need to be
8937 within the re-aligned local stack frame to save them. */
8938 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8939 offset
= (offset
+ 16 - 1) & -16;
8940 offset
+= frame
->nsseregs
* 16;
8942 frame
->sse_reg_save_offset
= offset
;
8944 /* The re-aligned stack starts here. Values before this point are not
8945 directly comparable with values below this point. In order to make
8946 sure that no value happens to be the same before and after, force
8947 the alignment computation below to add a non-zero value. */
8948 if (stack_realign_fp
)
8949 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8952 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8953 offset
+= frame
->va_arg_size
;
8955 /* Align start of frame for local function. */
8956 if (stack_realign_fp
8957 || offset
!= frame
->sse_reg_save_offset
8960 || cfun
->calls_alloca
8961 || ix86_current_function_calls_tls_descriptor
)
8962 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
8964 /* Frame pointer points here. */
8965 frame
->frame_pointer_offset
= offset
;
8969 /* Add outgoing arguments area. Can be skipped if we eliminated
8970 all the function calls as dead code.
8971 Skipping is however impossible when function calls alloca. Alloca
8972 expander assumes that last crtl->outgoing_args_size
8973 of stack frame are unused. */
8974 if (ACCUMULATE_OUTGOING_ARGS
8975 && (!crtl
->is_leaf
|| cfun
->calls_alloca
8976 || ix86_current_function_calls_tls_descriptor
))
8978 offset
+= crtl
->outgoing_args_size
;
8979 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
8982 frame
->outgoing_arguments_size
= 0;
8984 /* Align stack boundary. Only needed if we're calling another function
8986 if (!crtl
->is_leaf
|| cfun
->calls_alloca
8987 || ix86_current_function_calls_tls_descriptor
)
8988 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
8990 /* We've reached end of stack frame. */
8991 frame
->stack_pointer_offset
= offset
;
8993 /* Size prologue needs to allocate. */
8994 to_allocate
= offset
- frame
->sse_reg_save_offset
;
8996 if ((!to_allocate
&& frame
->nregs
<= 1)
8997 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
8998 frame
->save_regs_using_mov
= false;
9000 if (ix86_using_red_zone ()
9001 && crtl
->sp_is_unchanging
9003 && !ix86_current_function_calls_tls_descriptor
)
9005 frame
->red_zone_size
= to_allocate
;
9006 if (frame
->save_regs_using_mov
)
9007 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9008 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9009 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9012 frame
->red_zone_size
= 0;
9013 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9015 /* The SEH frame pointer location is near the bottom of the frame.
9016 This is enforced by the fact that the difference between the
9017 stack pointer and the frame pointer is limited to 240 bytes in
9018 the unwind data structure. */
9023 /* If we can leave the frame pointer where it is, do so. Also, returns
9024 the establisher frame for __builtin_frame_address (0). */
9025 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9026 if (diff
<= SEH_MAX_FRAME_SIZE
9027 && (diff
> 240 || (diff
& 15) != 0)
9028 && !crtl
->accesses_prior_frames
)
9030 /* Ideally we'd determine what portion of the local stack frame
9031 (within the constraint of the lowest 240) is most heavily used.
9032 But without that complication, simply bias the frame pointer
9033 by 128 bytes so as to maximize the amount of the local stack
9034 frame that is addressable with 8-bit offsets. */
9035 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9040 /* This is semi-inlined memory_address_length, but simplified
9041 since we know that we're always dealing with reg+offset, and
9042 to avoid having to create and discard all that rtl. */
9045 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9051 /* EBP and R13 cannot be encoded without an offset. */
9052 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9054 else if (IN_RANGE (offset
, -128, 127))
9057 /* ESP and R12 must be encoded with a SIB byte. */
9058 if (regno
== SP_REG
|| regno
== R12_REG
)
9064 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9065 The valid base registers are taken from CFUN->MACHINE->FS. */
9068 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9070 const struct machine_function
*m
= cfun
->machine
;
9071 rtx base_reg
= NULL
;
9072 HOST_WIDE_INT base_offset
= 0;
9074 if (m
->use_fast_prologue_epilogue
)
9076 /* Choose the base register most likely to allow the most scheduling
9077 opportunities. Generally FP is valid throughout the function,
9078 while DRAP must be reloaded within the epilogue. But choose either
9079 over the SP due to increased encoding size. */
9083 base_reg
= hard_frame_pointer_rtx
;
9084 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9086 else if (m
->fs
.drap_valid
)
9088 base_reg
= crtl
->drap_reg
;
9089 base_offset
= 0 - cfa_offset
;
9091 else if (m
->fs
.sp_valid
)
9093 base_reg
= stack_pointer_rtx
;
9094 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9099 HOST_WIDE_INT toffset
;
9102 /* Choose the base register with the smallest address encoding.
9103 With a tie, choose FP > DRAP > SP. */
9106 base_reg
= stack_pointer_rtx
;
9107 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9108 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9110 if (m
->fs
.drap_valid
)
9112 toffset
= 0 - cfa_offset
;
9113 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9116 base_reg
= crtl
->drap_reg
;
9117 base_offset
= toffset
;
9123 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9124 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9127 base_reg
= hard_frame_pointer_rtx
;
9128 base_offset
= toffset
;
9133 gcc_assert (base_reg
!= NULL
);
9135 return plus_constant (Pmode
, base_reg
, base_offset
);
9138 /* Emit code to save registers in the prologue. */
9141 ix86_emit_save_regs (void)
9146 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9147 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9149 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9150 RTX_FRAME_RELATED_P (insn
) = 1;
9154 /* Emit a single register save at CFA - CFA_OFFSET. */
9157 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9158 HOST_WIDE_INT cfa_offset
)
9160 struct machine_function
*m
= cfun
->machine
;
9161 rtx reg
= gen_rtx_REG (mode
, regno
);
9162 rtx mem
, addr
, base
, insn
;
9164 addr
= choose_baseaddr (cfa_offset
);
9165 mem
= gen_frame_mem (mode
, addr
);
9167 /* For SSE saves, we need to indicate the 128-bit alignment. */
9168 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9170 insn
= emit_move_insn (mem
, reg
);
9171 RTX_FRAME_RELATED_P (insn
) = 1;
9174 if (GET_CODE (base
) == PLUS
)
9175 base
= XEXP (base
, 0);
9176 gcc_checking_assert (REG_P (base
));
9178 /* When saving registers into a re-aligned local stack frame, avoid
9179 any tricky guessing by dwarf2out. */
9180 if (m
->fs
.realigned
)
9182 gcc_checking_assert (stack_realign_drap
);
9184 if (regno
== REGNO (crtl
->drap_reg
))
9186 /* A bit of a hack. We force the DRAP register to be saved in
9187 the re-aligned stack frame, which provides us with a copy
9188 of the CFA that will last past the prologue. Install it. */
9189 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9190 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9191 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9192 mem
= gen_rtx_MEM (mode
, addr
);
9193 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9197 /* The frame pointer is a stable reference within the
9198 aligned frame. Use it. */
9199 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9200 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9201 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9202 mem
= gen_rtx_MEM (mode
, addr
);
9203 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9204 gen_rtx_SET (VOIDmode
, mem
, reg
));
9208 /* The memory may not be relative to the current CFA register,
9209 which means that we may need to generate a new pattern for
9210 use by the unwind info. */
9211 else if (base
!= m
->fs
.cfa_reg
)
9213 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9214 m
->fs
.cfa_offset
- cfa_offset
);
9215 mem
= gen_rtx_MEM (mode
, addr
);
9216 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9220 /* Emit code to save registers using MOV insns.
9221 First register is stored at CFA - CFA_OFFSET. */
9223 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9227 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9228 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9230 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9231 cfa_offset
-= UNITS_PER_WORD
;
9235 /* Emit code to save SSE registers using MOV insns.
9236 First register is stored at CFA - CFA_OFFSET. */
9238 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9242 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9243 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9245 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9250 static GTY(()) rtx queued_cfa_restores
;
9252 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9253 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9254 Don't add the note if the previously saved value will be left untouched
9255 within stack red-zone till return, as unwinders can find the same value
9256 in the register and on the stack. */
9259 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9261 if (!crtl
->shrink_wrapped
9262 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9267 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9268 RTX_FRAME_RELATED_P (insn
) = 1;
9272 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9275 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9278 ix86_add_queued_cfa_restore_notes (rtx insn
)
9281 if (!queued_cfa_restores
)
9283 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9285 XEXP (last
, 1) = REG_NOTES (insn
);
9286 REG_NOTES (insn
) = queued_cfa_restores
;
9287 queued_cfa_restores
= NULL_RTX
;
9288 RTX_FRAME_RELATED_P (insn
) = 1;
9291 /* Expand prologue or epilogue stack adjustment.
9292 The pattern exist to put a dependency on all ebp-based memory accesses.
9293 STYLE should be negative if instructions should be marked as frame related,
9294 zero if %r11 register is live and cannot be freely used and positive
9298 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9299 int style
, bool set_cfa
)
9301 struct machine_function
*m
= cfun
->machine
;
9303 bool add_frame_related_expr
= false;
9305 if (Pmode
== SImode
)
9306 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9307 else if (x86_64_immediate_operand (offset
, DImode
))
9308 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9312 /* r11 is used by indirect sibcall return as well, set before the
9313 epilogue and used after the epilogue. */
9315 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9318 gcc_assert (src
!= hard_frame_pointer_rtx
9319 && dest
!= hard_frame_pointer_rtx
);
9320 tmp
= hard_frame_pointer_rtx
;
9322 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9324 add_frame_related_expr
= true;
9326 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9329 insn
= emit_insn (insn
);
9331 ix86_add_queued_cfa_restore_notes (insn
);
9337 gcc_assert (m
->fs
.cfa_reg
== src
);
9338 m
->fs
.cfa_offset
+= INTVAL (offset
);
9339 m
->fs
.cfa_reg
= dest
;
9341 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9342 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9343 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9344 RTX_FRAME_RELATED_P (insn
) = 1;
9348 RTX_FRAME_RELATED_P (insn
) = 1;
9349 if (add_frame_related_expr
)
9351 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9352 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9353 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9357 if (dest
== stack_pointer_rtx
)
9359 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9360 bool valid
= m
->fs
.sp_valid
;
9362 if (src
== hard_frame_pointer_rtx
)
9364 valid
= m
->fs
.fp_valid
;
9365 ooffset
= m
->fs
.fp_offset
;
9367 else if (src
== crtl
->drap_reg
)
9369 valid
= m
->fs
.drap_valid
;
9374 /* Else there are two possibilities: SP itself, which we set
9375 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9376 taken care of this by hand along the eh_return path. */
9377 gcc_checking_assert (src
== stack_pointer_rtx
9378 || offset
== const0_rtx
);
9381 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9382 m
->fs
.sp_valid
= valid
;
9386 /* Find an available register to be used as dynamic realign argument
9387 pointer regsiter. Such a register will be written in prologue and
9388 used in begin of body, so it must not be
9389 1. parameter passing register.
9391 We reuse static-chain register if it is available. Otherwise, we
9392 use DI for i386 and R13 for x86-64. We chose R13 since it has
9395 Return: the regno of chosen register. */
9398 find_drap_reg (void)
9400 tree decl
= cfun
->decl
;
9404 /* Use R13 for nested function or function need static chain.
9405 Since function with tail call may use any caller-saved
9406 registers in epilogue, DRAP must not use caller-saved
9407 register in such case. */
9408 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9415 /* Use DI for nested function or function need static chain.
9416 Since function with tail call may use any caller-saved
9417 registers in epilogue, DRAP must not use caller-saved
9418 register in such case. */
9419 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9422 /* Reuse static chain register if it isn't used for parameter
9424 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9426 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9427 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9434 /* Return minimum incoming stack alignment. */
9437 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9439 unsigned int incoming_stack_boundary
;
9441 /* Prefer the one specified at command line. */
9442 if (ix86_user_incoming_stack_boundary
)
9443 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9444 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9445 if -mstackrealign is used, it isn't used for sibcall check and
9446 estimated stack alignment is 128bit. */
9449 && ix86_force_align_arg_pointer
9450 && crtl
->stack_alignment_estimated
== 128)
9451 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9453 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9455 /* Incoming stack alignment can be changed on individual functions
9456 via force_align_arg_pointer attribute. We use the smallest
9457 incoming stack boundary. */
9458 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9459 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9460 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9461 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9463 /* The incoming stack frame has to be aligned at least at
9464 parm_stack_boundary. */
9465 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9466 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9468 /* Stack at entrance of main is aligned by runtime. We use the
9469 smallest incoming stack boundary. */
9470 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9471 && DECL_NAME (current_function_decl
)
9472 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9473 && DECL_FILE_SCOPE_P (current_function_decl
))
9474 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9476 return incoming_stack_boundary
;
9479 /* Update incoming stack boundary and estimated stack alignment. */
9482 ix86_update_stack_boundary (void)
9484 ix86_incoming_stack_boundary
9485 = ix86_minimum_incoming_stack_boundary (false);
9487 /* x86_64 vararg needs 16byte stack alignment for register save
9491 && crtl
->stack_alignment_estimated
< 128)
9492 crtl
->stack_alignment_estimated
= 128;
9495 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9496 needed or an rtx for DRAP otherwise. */
9499 ix86_get_drap_rtx (void)
9501 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9502 crtl
->need_drap
= true;
9504 if (stack_realign_drap
)
9506 /* Assign DRAP to vDRAP and returns vDRAP */
9507 unsigned int regno
= find_drap_reg ();
9512 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9513 crtl
->drap_reg
= arg_ptr
;
9516 drap_vreg
= copy_to_reg (arg_ptr
);
9520 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9523 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9524 RTX_FRAME_RELATED_P (insn
) = 1;
9532 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9535 ix86_internal_arg_pointer (void)
9537 return virtual_incoming_args_rtx
;
9540 struct scratch_reg
{
9545 /* Return a short-lived scratch register for use on function entry.
9546 In 32-bit mode, it is valid only after the registers are saved
9547 in the prologue. This register must be released by means of
9548 release_scratch_register_on_entry once it is dead. */
9551 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9559 /* We always use R11 in 64-bit mode. */
9564 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9566 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9567 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9568 int regparm
= ix86_function_regparm (fntype
, decl
);
9570 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9572 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9573 for the static chain register. */
9574 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9575 && drap_regno
!= AX_REG
)
9577 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9579 /* ecx is the static chain register. */
9580 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9581 && drap_regno
!= CX_REG
)
9583 else if (ix86_save_reg (BX_REG
, true))
9585 /* esi is the static chain register. */
9586 else if (!(regparm
== 3 && static_chain_p
)
9587 && ix86_save_reg (SI_REG
, true))
9589 else if (ix86_save_reg (DI_REG
, true))
9593 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9598 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9601 rtx insn
= emit_insn (gen_push (sr
->reg
));
9602 RTX_FRAME_RELATED_P (insn
) = 1;
9606 /* Release a scratch register obtained from the preceding function. */
9609 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9613 struct machine_function
*m
= cfun
->machine
;
9614 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9616 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9617 RTX_FRAME_RELATED_P (insn
) = 1;
9618 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9619 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9620 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9621 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9625 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9627 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9630 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9632 /* We skip the probe for the first interval + a small dope of 4 words and
9633 probe that many bytes past the specified size to maintain a protection
9634 area at the botton of the stack. */
9635 const int dope
= 4 * UNITS_PER_WORD
;
9636 rtx size_rtx
= GEN_INT (size
), last
;
9638 /* See if we have a constant small number of probes to generate. If so,
9639 that's the easy case. The run-time loop is made up of 11 insns in the
9640 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9641 for n # of intervals. */
9642 if (size
<= 5 * PROBE_INTERVAL
)
9644 HOST_WIDE_INT i
, adjust
;
9645 bool first_probe
= true;
9647 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9648 values of N from 1 until it exceeds SIZE. If only one probe is
9649 needed, this will not generate any code. Then adjust and probe
9650 to PROBE_INTERVAL + SIZE. */
9651 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9655 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9656 first_probe
= false;
9659 adjust
= PROBE_INTERVAL
;
9661 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9662 plus_constant (Pmode
, stack_pointer_rtx
,
9664 emit_stack_probe (stack_pointer_rtx
);
9668 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9670 adjust
= size
+ PROBE_INTERVAL
- i
;
9672 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9673 plus_constant (Pmode
, stack_pointer_rtx
,
9675 emit_stack_probe (stack_pointer_rtx
);
9677 /* Adjust back to account for the additional first interval. */
9678 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9679 plus_constant (Pmode
, stack_pointer_rtx
,
9680 PROBE_INTERVAL
+ dope
)));
9683 /* Otherwise, do the same as above, but in a loop. Note that we must be
9684 extra careful with variables wrapping around because we might be at
9685 the very top (or the very bottom) of the address space and we have
9686 to be able to handle this case properly; in particular, we use an
9687 equality test for the loop condition. */
9690 HOST_WIDE_INT rounded_size
;
9691 struct scratch_reg sr
;
9693 get_scratch_register_on_entry (&sr
);
9696 /* Step 1: round SIZE to the previous multiple of the interval. */
9698 rounded_size
= size
& -PROBE_INTERVAL
;
9701 /* Step 2: compute initial and final value of the loop counter. */
9703 /* SP = SP_0 + PROBE_INTERVAL. */
9704 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9705 plus_constant (Pmode
, stack_pointer_rtx
,
9706 - (PROBE_INTERVAL
+ dope
))));
9708 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9709 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9710 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9711 gen_rtx_PLUS (Pmode
, sr
.reg
,
9712 stack_pointer_rtx
)));
9717 while (SP != LAST_ADDR)
9719 SP = SP + PROBE_INTERVAL
9723 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9724 values of N from 1 until it is equal to ROUNDED_SIZE. */
9726 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9729 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9730 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9732 if (size
!= rounded_size
)
9734 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9735 plus_constant (Pmode
, stack_pointer_rtx
,
9736 rounded_size
- size
)));
9737 emit_stack_probe (stack_pointer_rtx
);
9740 /* Adjust back to account for the additional first interval. */
9741 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9742 plus_constant (Pmode
, stack_pointer_rtx
,
9743 PROBE_INTERVAL
+ dope
)));
9745 release_scratch_register_on_entry (&sr
);
9748 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9750 /* Even if the stack pointer isn't the CFA register, we need to correctly
9751 describe the adjustments made to it, in particular differentiate the
9752 frame-related ones from the frame-unrelated ones. */
9755 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9756 XVECEXP (expr
, 0, 0)
9757 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9758 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9759 XVECEXP (expr
, 0, 1)
9760 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9761 plus_constant (Pmode
, stack_pointer_rtx
,
9762 PROBE_INTERVAL
+ dope
+ size
));
9763 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9764 RTX_FRAME_RELATED_P (last
) = 1;
9766 cfun
->machine
->fs
.sp_offset
+= size
;
9769 /* Make sure nothing is scheduled before we are done. */
9770 emit_insn (gen_blockage ());
9773 /* Adjust the stack pointer up to REG while probing it. */
9776 output_adjust_stack_and_probe (rtx reg
)
9778 static int labelno
= 0;
9779 char loop_lab
[32], end_lab
[32];
9782 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9783 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9785 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9787 /* Jump to END_LAB if SP == LAST_ADDR. */
9788 xops
[0] = stack_pointer_rtx
;
9790 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9791 fputs ("\tje\t", asm_out_file
);
9792 assemble_name_raw (asm_out_file
, end_lab
);
9793 fputc ('\n', asm_out_file
);
9795 /* SP = SP + PROBE_INTERVAL. */
9796 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9797 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9800 xops
[1] = const0_rtx
;
9801 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9803 fprintf (asm_out_file
, "\tjmp\t");
9804 assemble_name_raw (asm_out_file
, loop_lab
);
9805 fputc ('\n', asm_out_file
);
9807 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9812 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9813 inclusive. These are offsets from the current stack pointer. */
9816 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9818 /* See if we have a constant small number of probes to generate. If so,
9819 that's the easy case. The run-time loop is made up of 7 insns in the
9820 generic case while the compile-time loop is made up of n insns for n #
9822 if (size
<= 7 * PROBE_INTERVAL
)
9826 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9827 it exceeds SIZE. If only one probe is needed, this will not
9828 generate any code. Then probe at FIRST + SIZE. */
9829 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9830 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9833 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9837 /* Otherwise, do the same as above, but in a loop. Note that we must be
9838 extra careful with variables wrapping around because we might be at
9839 the very top (or the very bottom) of the address space and we have
9840 to be able to handle this case properly; in particular, we use an
9841 equality test for the loop condition. */
9844 HOST_WIDE_INT rounded_size
, last
;
9845 struct scratch_reg sr
;
9847 get_scratch_register_on_entry (&sr
);
9850 /* Step 1: round SIZE to the previous multiple of the interval. */
9852 rounded_size
= size
& -PROBE_INTERVAL
;
9855 /* Step 2: compute initial and final value of the loop counter. */
9857 /* TEST_OFFSET = FIRST. */
9858 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9860 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9861 last
= first
+ rounded_size
;
9866 while (TEST_ADDR != LAST_ADDR)
9868 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9872 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9873 until it is equal to ROUNDED_SIZE. */
9875 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9878 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9879 that SIZE is equal to ROUNDED_SIZE. */
9881 if (size
!= rounded_size
)
9882 emit_stack_probe (plus_constant (Pmode
,
9883 gen_rtx_PLUS (Pmode
,
9886 rounded_size
- size
));
9888 release_scratch_register_on_entry (&sr
);
9891 /* Make sure nothing is scheduled before we are done. */
9892 emit_insn (gen_blockage ());
9895 /* Probe a range of stack addresses from REG to END, inclusive. These are
9896 offsets from the current stack pointer. */
9899 output_probe_stack_range (rtx reg
, rtx end
)
9901 static int labelno
= 0;
9902 char loop_lab
[32], end_lab
[32];
9905 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9906 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9908 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9910 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9913 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9914 fputs ("\tje\t", asm_out_file
);
9915 assemble_name_raw (asm_out_file
, end_lab
);
9916 fputc ('\n', asm_out_file
);
9918 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9919 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9920 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9922 /* Probe at TEST_ADDR. */
9923 xops
[0] = stack_pointer_rtx
;
9925 xops
[2] = const0_rtx
;
9926 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9928 fprintf (asm_out_file
, "\tjmp\t");
9929 assemble_name_raw (asm_out_file
, loop_lab
);
9930 fputc ('\n', asm_out_file
);
9932 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9937 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9938 to be generated in correct form. */
9940 ix86_finalize_stack_realign_flags (void)
9942 /* Check if stack realign is really needed after reload, and
9943 stores result in cfun */
9944 unsigned int incoming_stack_boundary
9945 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9946 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9947 unsigned int stack_realign
= (incoming_stack_boundary
9949 ? crtl
->max_used_stack_slot_alignment
9950 : crtl
->stack_alignment_needed
));
9952 if (crtl
->stack_realign_finalized
)
9954 /* After stack_realign_needed is finalized, we can't no longer
9956 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9960 /* If the only reason for frame_pointer_needed is that we conservatively
9961 assumed stack realignment might be needed, but in the end nothing that
9962 needed the stack alignment had been spilled, clear frame_pointer_needed
9963 and say we don't need stack realignment. */
9966 && frame_pointer_needed
9968 && flag_omit_frame_pointer
9969 && crtl
->sp_is_unchanging
9970 && !ix86_current_function_calls_tls_descriptor
9971 && !crtl
->accesses_prior_frames
9972 && !cfun
->calls_alloca
9973 && !crtl
->calls_eh_return
9974 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
9975 && !ix86_frame_pointer_required ()
9976 && get_frame_size () == 0
9977 && ix86_nsaved_sseregs () == 0
9978 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
9980 HARD_REG_SET set_up_by_prologue
, prologue_used
;
9983 CLEAR_HARD_REG_SET (prologue_used
);
9984 CLEAR_HARD_REG_SET (set_up_by_prologue
);
9985 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
9986 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
9987 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
9988 HARD_FRAME_POINTER_REGNUM
);
9992 FOR_BB_INSNS (bb
, insn
)
9993 if (NONDEBUG_INSN_P (insn
)
9994 && requires_stack_frame_p (insn
, prologue_used
,
9995 set_up_by_prologue
))
9997 crtl
->stack_realign_needed
= stack_realign
;
9998 crtl
->stack_realign_finalized
= true;
10003 frame_pointer_needed
= false;
10004 stack_realign
= false;
10005 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10006 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10007 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10008 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10009 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10010 df_finish_pass (true);
10011 df_scan_alloc (NULL
);
10013 df_compute_regs_ever_live (true);
10017 crtl
->stack_realign_needed
= stack_realign
;
10018 crtl
->stack_realign_finalized
= true;
10021 /* Expand the prologue into a bunch of separate insns. */
10024 ix86_expand_prologue (void)
10026 struct machine_function
*m
= cfun
->machine
;
10029 struct ix86_frame frame
;
10030 HOST_WIDE_INT allocate
;
10031 bool int_registers_saved
;
10032 bool sse_registers_saved
;
10034 ix86_finalize_stack_realign_flags ();
10036 /* DRAP should not coexist with stack_realign_fp */
10037 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10039 memset (&m
->fs
, 0, sizeof (m
->fs
));
10041 /* Initialize CFA state for before the prologue. */
10042 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10043 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10045 /* Track SP offset to the CFA. We continue tracking this after we've
10046 swapped the CFA register away from SP. In the case of re-alignment
10047 this is fudged; we're interested to offsets within the local frame. */
10048 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10049 m
->fs
.sp_valid
= true;
10051 ix86_compute_frame_layout (&frame
);
10053 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10055 /* We should have already generated an error for any use of
10056 ms_hook on a nested function. */
10057 gcc_checking_assert (!ix86_static_chain_on_stack
);
10059 /* Check if profiling is active and we shall use profiling before
10060 prologue variant. If so sorry. */
10061 if (crtl
->profile
&& flag_fentry
!= 0)
10062 sorry ("ms_hook_prologue attribute isn%'t compatible "
10063 "with -mfentry for 32-bit");
10065 /* In ix86_asm_output_function_label we emitted:
10066 8b ff movl.s %edi,%edi
10068 8b ec movl.s %esp,%ebp
10070 This matches the hookable function prologue in Win32 API
10071 functions in Microsoft Windows XP Service Pack 2 and newer.
10072 Wine uses this to enable Windows apps to hook the Win32 API
10073 functions provided by Wine.
10075 What that means is that we've already set up the frame pointer. */
10077 if (frame_pointer_needed
10078 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10082 /* We've decided to use the frame pointer already set up.
10083 Describe this to the unwinder by pretending that both
10084 push and mov insns happen right here.
10086 Putting the unwind info here at the end of the ms_hook
10087 is done so that we can make absolutely certain we get
10088 the required byte sequence at the start of the function,
10089 rather than relying on an assembler that can produce
10090 the exact encoding required.
10092 However it does mean (in the unpatched case) that we have
10093 a 1 insn window where the asynchronous unwind info is
10094 incorrect. However, if we placed the unwind info at
10095 its correct location we would have incorrect unwind info
10096 in the patched case. Which is probably all moot since
10097 I don't expect Wine generates dwarf2 unwind info for the
10098 system libraries that use this feature. */
10100 insn
= emit_insn (gen_blockage ());
10102 push
= gen_push (hard_frame_pointer_rtx
);
10103 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10104 stack_pointer_rtx
);
10105 RTX_FRAME_RELATED_P (push
) = 1;
10106 RTX_FRAME_RELATED_P (mov
) = 1;
10108 RTX_FRAME_RELATED_P (insn
) = 1;
10109 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10110 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10112 /* Note that gen_push incremented m->fs.cfa_offset, even
10113 though we didn't emit the push insn here. */
10114 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10115 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10116 m
->fs
.fp_valid
= true;
10120 /* The frame pointer is not needed so pop %ebp again.
10121 This leaves us with a pristine state. */
10122 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10126 /* The first insn of a function that accepts its static chain on the
10127 stack is to push the register that would be filled in by a direct
10128 call. This insn will be skipped by the trampoline. */
10129 else if (ix86_static_chain_on_stack
)
10131 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10132 emit_insn (gen_blockage ());
10134 /* We don't want to interpret this push insn as a register save,
10135 only as a stack adjustment. The real copy of the register as
10136 a save will be done later, if needed. */
10137 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10138 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10139 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10140 RTX_FRAME_RELATED_P (insn
) = 1;
10143 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10144 of DRAP is needed and stack realignment is really needed after reload */
10145 if (stack_realign_drap
)
10147 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10149 /* Only need to push parameter pointer reg if it is caller saved. */
10150 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10152 /* Push arg pointer reg */
10153 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10154 RTX_FRAME_RELATED_P (insn
) = 1;
10157 /* Grab the argument pointer. */
10158 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10159 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10160 RTX_FRAME_RELATED_P (insn
) = 1;
10161 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10162 m
->fs
.cfa_offset
= 0;
10164 /* Align the stack. */
10165 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10167 GEN_INT (-align_bytes
)));
10168 RTX_FRAME_RELATED_P (insn
) = 1;
10170 /* Replicate the return address on the stack so that return
10171 address can be reached via (argp - 1) slot. This is needed
10172 to implement macro RETURN_ADDR_RTX and intrinsic function
10173 expand_builtin_return_addr etc. */
10174 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10175 t
= gen_frame_mem (word_mode
, t
);
10176 insn
= emit_insn (gen_push (t
));
10177 RTX_FRAME_RELATED_P (insn
) = 1;
10179 /* For the purposes of frame and register save area addressing,
10180 we've started over with a new frame. */
10181 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10182 m
->fs
.realigned
= true;
10185 int_registers_saved
= (frame
.nregs
== 0);
10186 sse_registers_saved
= (frame
.nsseregs
== 0);
10188 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10190 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10191 slower on all targets. Also sdb doesn't like it. */
10192 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10193 RTX_FRAME_RELATED_P (insn
) = 1;
10195 /* Push registers now, before setting the frame pointer
10197 if (!int_registers_saved
10199 && !frame
.save_regs_using_mov
)
10201 ix86_emit_save_regs ();
10202 int_registers_saved
= true;
10203 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10206 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10208 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10209 RTX_FRAME_RELATED_P (insn
) = 1;
10211 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10212 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10213 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10214 m
->fs
.fp_valid
= true;
10218 if (!int_registers_saved
)
10220 /* If saving registers via PUSH, do so now. */
10221 if (!frame
.save_regs_using_mov
)
10223 ix86_emit_save_regs ();
10224 int_registers_saved
= true;
10225 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10228 /* When using red zone we may start register saving before allocating
10229 the stack frame saving one cycle of the prologue. However, avoid
10230 doing this if we have to probe the stack; at least on x86_64 the
10231 stack probe can turn into a call that clobbers a red zone location. */
10232 else if (ix86_using_red_zone ()
10233 && (! TARGET_STACK_PROBE
10234 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10236 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10237 int_registers_saved
= true;
10241 if (stack_realign_fp
)
10243 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10244 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10246 /* The computation of the size of the re-aligned stack frame means
10247 that we must allocate the size of the register save area before
10248 performing the actual alignment. Otherwise we cannot guarantee
10249 that there's enough storage above the realignment point. */
10250 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10251 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10252 GEN_INT (m
->fs
.sp_offset
10253 - frame
.sse_reg_save_offset
),
10256 /* Align the stack. */
10257 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10259 GEN_INT (-align_bytes
)));
10261 /* For the purposes of register save area addressing, the stack
10262 pointer is no longer valid. As for the value of sp_offset,
10263 see ix86_compute_frame_layout, which we need to match in order
10264 to pass verification of stack_pointer_offset at the end. */
10265 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10266 m
->fs
.sp_valid
= false;
10269 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10271 if (flag_stack_usage_info
)
10273 /* We start to count from ARG_POINTER. */
10274 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10276 /* If it was realigned, take into account the fake frame. */
10277 if (stack_realign_drap
)
10279 if (ix86_static_chain_on_stack
)
10280 stack_size
+= UNITS_PER_WORD
;
10282 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10283 stack_size
+= UNITS_PER_WORD
;
10285 /* This over-estimates by 1 minimal-stack-alignment-unit but
10286 mitigates that by counting in the new return address slot. */
10287 current_function_dynamic_stack_size
10288 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10291 current_function_static_stack_size
= stack_size
;
10294 /* On SEH target with very large frame size, allocate an area to save
10295 SSE registers (as the very large allocation won't be described). */
10297 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10298 && !sse_registers_saved
)
10300 HOST_WIDE_INT sse_size
=
10301 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10303 gcc_assert (int_registers_saved
);
10305 /* No need to do stack checking as the area will be immediately
10307 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10308 GEN_INT (-sse_size
), -1,
10309 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10310 allocate
-= sse_size
;
10311 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10312 sse_registers_saved
= true;
10315 /* The stack has already been decremented by the instruction calling us
10316 so probe if the size is non-negative to preserve the protection area. */
10317 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10319 /* We expect the registers to be saved when probes are used. */
10320 gcc_assert (int_registers_saved
);
10322 if (STACK_CHECK_MOVING_SP
)
10324 ix86_adjust_stack_and_probe (allocate
);
10329 HOST_WIDE_INT size
= allocate
;
10331 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10332 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10334 if (TARGET_STACK_PROBE
)
10335 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10337 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10343 else if (!ix86_target_stack_probe ()
10344 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10346 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10347 GEN_INT (-allocate
), -1,
10348 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10352 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10354 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10355 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10356 bool eax_live
= false;
10357 bool r10_live
= false;
10360 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10361 if (!TARGET_64BIT_MS_ABI
)
10362 eax_live
= ix86_eax_live_at_start_p ();
10364 /* Note that SEH directives need to continue tracking the stack
10365 pointer even after the frame pointer has been set up. */
10368 insn
= emit_insn (gen_push (eax
));
10369 allocate
-= UNITS_PER_WORD
;
10370 if (sp_is_cfa_reg
|| TARGET_SEH
)
10373 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10374 RTX_FRAME_RELATED_P (insn
) = 1;
10380 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10381 insn
= emit_insn (gen_push (r10
));
10382 allocate
-= UNITS_PER_WORD
;
10383 if (sp_is_cfa_reg
|| TARGET_SEH
)
10386 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10387 RTX_FRAME_RELATED_P (insn
) = 1;
10391 emit_move_insn (eax
, GEN_INT (allocate
));
10392 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10394 /* Use the fact that AX still contains ALLOCATE. */
10395 adjust_stack_insn
= (Pmode
== DImode
10396 ? gen_pro_epilogue_adjust_stack_di_sub
10397 : gen_pro_epilogue_adjust_stack_si_sub
);
10399 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10400 stack_pointer_rtx
, eax
));
10402 if (sp_is_cfa_reg
|| TARGET_SEH
)
10405 m
->fs
.cfa_offset
+= allocate
;
10406 RTX_FRAME_RELATED_P (insn
) = 1;
10407 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10408 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10409 plus_constant (Pmode
, stack_pointer_rtx
,
10412 m
->fs
.sp_offset
+= allocate
;
10414 if (r10_live
&& eax_live
)
10416 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10417 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10418 gen_frame_mem (word_mode
, t
));
10419 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10420 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10421 gen_frame_mem (word_mode
, t
));
10423 else if (eax_live
|| r10_live
)
10425 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10426 emit_move_insn (gen_rtx_REG (word_mode
,
10427 (eax_live
? AX_REG
: R10_REG
)),
10428 gen_frame_mem (word_mode
, t
));
10431 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10433 /* If we havn't already set up the frame pointer, do so now. */
10434 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10436 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10437 GEN_INT (frame
.stack_pointer_offset
10438 - frame
.hard_frame_pointer_offset
));
10439 insn
= emit_insn (insn
);
10440 RTX_FRAME_RELATED_P (insn
) = 1;
10441 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10443 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10444 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10445 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10446 m
->fs
.fp_valid
= true;
10449 if (!int_registers_saved
)
10450 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10451 if (!sse_registers_saved
)
10452 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10454 pic_reg_used
= false;
10455 if (pic_offset_table_rtx
10456 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10459 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10461 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10462 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10464 pic_reg_used
= true;
10471 if (ix86_cmodel
== CM_LARGE_PIC
)
10473 rtx label
, tmp_reg
;
10475 gcc_assert (Pmode
== DImode
);
10476 label
= gen_label_rtx ();
10477 emit_label (label
);
10478 LABEL_PRESERVE_P (label
) = 1;
10479 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10480 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10481 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10483 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10484 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10485 pic_offset_table_rtx
, tmp_reg
));
10488 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10492 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10493 RTX_FRAME_RELATED_P (insn
) = 1;
10494 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10498 /* In the pic_reg_used case, make sure that the got load isn't deleted
10499 when mcount needs it. Blockage to avoid call movement across mcount
10500 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10502 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10503 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10505 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10507 /* vDRAP is setup but after reload it turns out stack realign
10508 isn't necessary, here we will emit prologue to setup DRAP
10509 without stack realign adjustment */
10510 t
= choose_baseaddr (0);
10511 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10514 /* Prevent instructions from being scheduled into register save push
10515 sequence when access to the redzone area is done through frame pointer.
10516 The offset between the frame pointer and the stack pointer is calculated
10517 relative to the value of the stack pointer at the end of the function
10518 prologue, and moving instructions that access redzone area via frame
10519 pointer inside push sequence violates this assumption. */
10520 if (frame_pointer_needed
&& frame
.red_zone_size
)
10521 emit_insn (gen_memory_blockage ());
10523 /* Emit cld instruction if stringops are used in the function. */
10524 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10525 emit_insn (gen_cld ());
10527 /* SEH requires that the prologue end within 256 bytes of the start of
10528 the function. Prevent instruction schedules that would extend that.
10529 Further, prevent alloca modifications to the stack pointer from being
10530 combined with prologue modifications. */
10532 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10535 /* Emit code to restore REG using a POP insn. */
10538 ix86_emit_restore_reg_using_pop (rtx reg
)
10540 struct machine_function
*m
= cfun
->machine
;
10541 rtx insn
= emit_insn (gen_pop (reg
));
10543 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10544 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10546 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10547 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10549 /* Previously we'd represented the CFA as an expression
10550 like *(%ebp - 8). We've just popped that value from
10551 the stack, which means we need to reset the CFA to
10552 the drap register. This will remain until we restore
10553 the stack pointer. */
10554 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10555 RTX_FRAME_RELATED_P (insn
) = 1;
10557 /* This means that the DRAP register is valid for addressing too. */
10558 m
->fs
.drap_valid
= true;
10562 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10564 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10565 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10566 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10567 RTX_FRAME_RELATED_P (insn
) = 1;
10569 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10572 /* When the frame pointer is the CFA, and we pop it, we are
10573 swapping back to the stack pointer as the CFA. This happens
10574 for stack frames that don't allocate other data, so we assume
10575 the stack pointer is now pointing at the return address, i.e.
10576 the function entry state, which makes the offset be 1 word. */
10577 if (reg
== hard_frame_pointer_rtx
)
10579 m
->fs
.fp_valid
= false;
10580 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10582 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10583 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10585 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10586 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10587 GEN_INT (m
->fs
.cfa_offset
)));
10588 RTX_FRAME_RELATED_P (insn
) = 1;
10593 /* Emit code to restore saved registers using POP insns. */
10596 ix86_emit_restore_regs_using_pop (void)
10598 unsigned int regno
;
10600 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10601 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10602 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10605 /* Emit code and notes for the LEAVE instruction. */
10608 ix86_emit_leave (void)
10610 struct machine_function
*m
= cfun
->machine
;
10611 rtx insn
= emit_insn (ix86_gen_leave ());
10613 ix86_add_queued_cfa_restore_notes (insn
);
10615 gcc_assert (m
->fs
.fp_valid
);
10616 m
->fs
.sp_valid
= true;
10617 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10618 m
->fs
.fp_valid
= false;
10620 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10622 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10623 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10625 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10626 plus_constant (Pmode
, stack_pointer_rtx
,
10628 RTX_FRAME_RELATED_P (insn
) = 1;
10630 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10634 /* Emit code to restore saved registers using MOV insns.
10635 First register is restored from CFA - CFA_OFFSET. */
10637 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10638 bool maybe_eh_return
)
10640 struct machine_function
*m
= cfun
->machine
;
10641 unsigned int regno
;
10643 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10644 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10646 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10649 mem
= choose_baseaddr (cfa_offset
);
10650 mem
= gen_frame_mem (word_mode
, mem
);
10651 insn
= emit_move_insn (reg
, mem
);
10653 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10655 /* Previously we'd represented the CFA as an expression
10656 like *(%ebp - 8). We've just popped that value from
10657 the stack, which means we need to reset the CFA to
10658 the drap register. This will remain until we restore
10659 the stack pointer. */
10660 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10661 RTX_FRAME_RELATED_P (insn
) = 1;
10663 /* This means that the DRAP register is valid for addressing. */
10664 m
->fs
.drap_valid
= true;
10667 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10669 cfa_offset
-= UNITS_PER_WORD
;
10673 /* Emit code to restore saved registers using MOV insns.
10674 First register is restored from CFA - CFA_OFFSET. */
10676 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10677 bool maybe_eh_return
)
10679 unsigned int regno
;
10681 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10682 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10684 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10687 mem
= choose_baseaddr (cfa_offset
);
10688 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10689 set_mem_align (mem
, 128);
10690 emit_move_insn (reg
, mem
);
10692 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10698 /* Restore function stack, frame, and registers. */
10701 ix86_expand_epilogue (int style
)
10703 struct machine_function
*m
= cfun
->machine
;
10704 struct machine_frame_state frame_state_save
= m
->fs
;
10705 struct ix86_frame frame
;
10706 bool restore_regs_via_mov
;
10709 ix86_finalize_stack_realign_flags ();
10710 ix86_compute_frame_layout (&frame
);
10712 m
->fs
.sp_valid
= (!frame_pointer_needed
10713 || (crtl
->sp_is_unchanging
10714 && !stack_realign_fp
));
10715 gcc_assert (!m
->fs
.sp_valid
10716 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10718 /* The FP must be valid if the frame pointer is present. */
10719 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10720 gcc_assert (!m
->fs
.fp_valid
10721 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10723 /* We must have *some* valid pointer to the stack frame. */
10724 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10726 /* The DRAP is never valid at this point. */
10727 gcc_assert (!m
->fs
.drap_valid
);
10729 /* See the comment about red zone and frame
10730 pointer usage in ix86_expand_prologue. */
10731 if (frame_pointer_needed
&& frame
.red_zone_size
)
10732 emit_insn (gen_memory_blockage ());
10734 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10735 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10737 /* Determine the CFA offset of the end of the red-zone. */
10738 m
->fs
.red_zone_offset
= 0;
10739 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10741 /* The red-zone begins below the return address. */
10742 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10744 /* When the register save area is in the aligned portion of
10745 the stack, determine the maximum runtime displacement that
10746 matches up with the aligned frame. */
10747 if (stack_realign_drap
)
10748 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10752 /* Special care must be taken for the normal return case of a function
10753 using eh_return: the eax and edx registers are marked as saved, but
10754 not restored along this path. Adjust the save location to match. */
10755 if (crtl
->calls_eh_return
&& style
!= 2)
10756 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10758 /* EH_RETURN requires the use of moves to function properly. */
10759 if (crtl
->calls_eh_return
)
10760 restore_regs_via_mov
= true;
10761 /* SEH requires the use of pops to identify the epilogue. */
10762 else if (TARGET_SEH
)
10763 restore_regs_via_mov
= false;
10764 /* If we're only restoring one register and sp is not valid then
10765 using a move instruction to restore the register since it's
10766 less work than reloading sp and popping the register. */
10767 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10768 restore_regs_via_mov
= true;
10769 else if (TARGET_EPILOGUE_USING_MOVE
10770 && cfun
->machine
->use_fast_prologue_epilogue
10771 && (frame
.nregs
> 1
10772 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10773 restore_regs_via_mov
= true;
10774 else if (frame_pointer_needed
10776 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10777 restore_regs_via_mov
= true;
10778 else if (frame_pointer_needed
10779 && TARGET_USE_LEAVE
10780 && cfun
->machine
->use_fast_prologue_epilogue
10781 && frame
.nregs
== 1)
10782 restore_regs_via_mov
= true;
10784 restore_regs_via_mov
= false;
10786 if (restore_regs_via_mov
|| frame
.nsseregs
)
10788 /* Ensure that the entire register save area is addressable via
10789 the stack pointer, if we will restore via sp. */
10791 && m
->fs
.sp_offset
> 0x7fffffff
10792 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10793 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10795 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10796 GEN_INT (m
->fs
.sp_offset
10797 - frame
.sse_reg_save_offset
),
10799 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10803 /* If there are any SSE registers to restore, then we have to do it
10804 via moves, since there's obviously no pop for SSE regs. */
10805 if (frame
.nsseregs
)
10806 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10809 if (restore_regs_via_mov
)
10814 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10816 /* eh_return epilogues need %ecx added to the stack pointer. */
10819 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10821 /* Stack align doesn't work with eh_return. */
10822 gcc_assert (!stack_realign_drap
);
10823 /* Neither does regparm nested functions. */
10824 gcc_assert (!ix86_static_chain_on_stack
);
10826 if (frame_pointer_needed
)
10828 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10829 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10830 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10832 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10833 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10835 /* Note that we use SA as a temporary CFA, as the return
10836 address is at the proper place relative to it. We
10837 pretend this happens at the FP restore insn because
10838 prior to this insn the FP would be stored at the wrong
10839 offset relative to SA, and after this insn we have no
10840 other reasonable register to use for the CFA. We don't
10841 bother resetting the CFA to the SP for the duration of
10842 the return insn. */
10843 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10844 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10845 ix86_add_queued_cfa_restore_notes (insn
);
10846 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10847 RTX_FRAME_RELATED_P (insn
) = 1;
10849 m
->fs
.cfa_reg
= sa
;
10850 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10851 m
->fs
.fp_valid
= false;
10853 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10854 const0_rtx
, style
, false);
10858 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10859 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10860 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10861 ix86_add_queued_cfa_restore_notes (insn
);
10863 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10864 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10866 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10867 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10868 plus_constant (Pmode
, stack_pointer_rtx
,
10870 RTX_FRAME_RELATED_P (insn
) = 1;
10873 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10874 m
->fs
.sp_valid
= true;
10879 /* SEH requires that the function end with (1) a stack adjustment
10880 if necessary, (2) a sequence of pops, and (3) a return or
10881 jump instruction. Prevent insns from the function body from
10882 being scheduled into this sequence. */
10885 /* Prevent a catch region from being adjacent to the standard
10886 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10887 several other flags that would be interesting to test are
10889 if (flag_non_call_exceptions
)
10890 emit_insn (gen_nops (const1_rtx
));
10892 emit_insn (gen_blockage ());
10895 /* First step is to deallocate the stack frame so that we can
10896 pop the registers. Also do it on SEH target for very large
10897 frame as the emitted instructions aren't allowed by the ABI in
10899 if (!m
->fs
.sp_valid
10901 && (m
->fs
.sp_offset
- frame
.reg_save_offset
10902 >= SEH_MAX_FRAME_SIZE
)))
10904 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10905 GEN_INT (m
->fs
.fp_offset
10906 - frame
.reg_save_offset
),
10909 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10911 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10912 GEN_INT (m
->fs
.sp_offset
10913 - frame
.reg_save_offset
),
10915 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10918 ix86_emit_restore_regs_using_pop ();
10921 /* If we used a stack pointer and haven't already got rid of it,
10923 if (m
->fs
.fp_valid
)
10925 /* If the stack pointer is valid and pointing at the frame
10926 pointer store address, then we only need a pop. */
10927 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10928 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10929 /* Leave results in shorter dependency chains on CPUs that are
10930 able to grok it fast. */
10931 else if (TARGET_USE_LEAVE
10932 || optimize_function_for_size_p (cfun
)
10933 || !cfun
->machine
->use_fast_prologue_epilogue
)
10934 ix86_emit_leave ();
10937 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10938 hard_frame_pointer_rtx
,
10939 const0_rtx
, style
, !using_drap
);
10940 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10946 int param_ptr_offset
= UNITS_PER_WORD
;
10949 gcc_assert (stack_realign_drap
);
10951 if (ix86_static_chain_on_stack
)
10952 param_ptr_offset
+= UNITS_PER_WORD
;
10953 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10954 param_ptr_offset
+= UNITS_PER_WORD
;
10956 insn
= emit_insn (gen_rtx_SET
10957 (VOIDmode
, stack_pointer_rtx
,
10958 gen_rtx_PLUS (Pmode
,
10960 GEN_INT (-param_ptr_offset
))));
10961 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10962 m
->fs
.cfa_offset
= param_ptr_offset
;
10963 m
->fs
.sp_offset
= param_ptr_offset
;
10964 m
->fs
.realigned
= false;
10966 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10967 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10968 GEN_INT (param_ptr_offset
)));
10969 RTX_FRAME_RELATED_P (insn
) = 1;
10971 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10972 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10975 /* At this point the stack pointer must be valid, and we must have
10976 restored all of the registers. We may not have deallocated the
10977 entire stack frame. We've delayed this until now because it may
10978 be possible to merge the local stack deallocation with the
10979 deallocation forced by ix86_static_chain_on_stack. */
10980 gcc_assert (m
->fs
.sp_valid
);
10981 gcc_assert (!m
->fs
.fp_valid
);
10982 gcc_assert (!m
->fs
.realigned
);
10983 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10985 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10986 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10990 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10992 /* Sibcall epilogues don't want a return instruction. */
10995 m
->fs
= frame_state_save
;
10999 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11001 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11003 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11004 address, do explicit add, and jump indirectly to the caller. */
11006 if (crtl
->args
.pops_args
>= 65536)
11008 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11011 /* There is no "pascal" calling convention in any 64bit ABI. */
11012 gcc_assert (!TARGET_64BIT
);
11014 insn
= emit_insn (gen_pop (ecx
));
11015 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11016 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11018 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11019 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11020 add_reg_note (insn
, REG_CFA_REGISTER
,
11021 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11022 RTX_FRAME_RELATED_P (insn
) = 1;
11024 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11026 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11029 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11032 emit_jump_insn (gen_simple_return_internal ());
11034 /* Restore the state back to the state from the prologue,
11035 so that it's correct for the next epilogue. */
11036 m
->fs
= frame_state_save
;
11039 /* Reset from the function's potential modifications. */
11042 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11043 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11045 if (pic_offset_table_rtx
)
11046 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11048 /* Mach-O doesn't support labels at the end of objects, so if
11049 it looks like we might want one, insert a NOP. */
11051 rtx insn
= get_last_insn ();
11052 rtx deleted_debug_label
= NULL_RTX
;
11055 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11057 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11058 notes only, instead set their CODE_LABEL_NUMBER to -1,
11059 otherwise there would be code generation differences
11060 in between -g and -g0. */
11061 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11062 deleted_debug_label
= insn
;
11063 insn
= PREV_INSN (insn
);
11068 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11069 fputs ("\tnop\n", file
);
11070 else if (deleted_debug_label
)
11071 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11072 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11073 CODE_LABEL_NUMBER (insn
) = -1;
11079 /* Return a scratch register to use in the split stack prologue. The
11080 split stack prologue is used for -fsplit-stack. It is the first
11081 instructions in the function, even before the regular prologue.
11082 The scratch register can be any caller-saved register which is not
11083 used for parameters or for the static chain. */
11085 static unsigned int
11086 split_stack_prologue_scratch_regno (void)
11095 is_fastcall
= (lookup_attribute ("fastcall",
11096 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11098 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11102 if (DECL_STATIC_CHAIN (cfun
->decl
))
11104 sorry ("-fsplit-stack does not support fastcall with "
11105 "nested function");
11106 return INVALID_REGNUM
;
11110 else if (regparm
< 3)
11112 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11118 sorry ("-fsplit-stack does not support 2 register "
11119 " parameters for a nested function");
11120 return INVALID_REGNUM
;
11127 /* FIXME: We could make this work by pushing a register
11128 around the addition and comparison. */
11129 sorry ("-fsplit-stack does not support 3 register parameters");
11130 return INVALID_REGNUM
;
11135 /* A SYMBOL_REF for the function which allocates new stackspace for
11138 static GTY(()) rtx split_stack_fn
;
11140 /* A SYMBOL_REF for the more stack function when using the large
11143 static GTY(()) rtx split_stack_fn_large
;
11145 /* Handle -fsplit-stack. These are the first instructions in the
11146 function, even before the regular prologue. */
11149 ix86_expand_split_stack_prologue (void)
11151 struct ix86_frame frame
;
11152 HOST_WIDE_INT allocate
;
11153 unsigned HOST_WIDE_INT args_size
;
11154 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11155 rtx scratch_reg
= NULL_RTX
;
11156 rtx varargs_label
= NULL_RTX
;
11159 gcc_assert (flag_split_stack
&& reload_completed
);
11161 ix86_finalize_stack_realign_flags ();
11162 ix86_compute_frame_layout (&frame
);
11163 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11165 /* This is the label we will branch to if we have enough stack
11166 space. We expect the basic block reordering pass to reverse this
11167 branch if optimizing, so that we branch in the unlikely case. */
11168 label
= gen_label_rtx ();
11170 /* We need to compare the stack pointer minus the frame size with
11171 the stack boundary in the TCB. The stack boundary always gives
11172 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11173 can compare directly. Otherwise we need to do an addition. */
11175 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11176 UNSPEC_STACK_CHECK
);
11177 limit
= gen_rtx_CONST (Pmode
, limit
);
11178 limit
= gen_rtx_MEM (Pmode
, limit
);
11179 if (allocate
< SPLIT_STACK_AVAILABLE
)
11180 current
= stack_pointer_rtx
;
11183 unsigned int scratch_regno
;
11186 /* We need a scratch register to hold the stack pointer minus
11187 the required frame size. Since this is the very start of the
11188 function, the scratch register can be any caller-saved
11189 register which is not used for parameters. */
11190 offset
= GEN_INT (- allocate
);
11191 scratch_regno
= split_stack_prologue_scratch_regno ();
11192 if (scratch_regno
== INVALID_REGNUM
)
11194 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11195 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11197 /* We don't use ix86_gen_add3 in this case because it will
11198 want to split to lea, but when not optimizing the insn
11199 will not be split after this point. */
11200 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11201 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11206 emit_move_insn (scratch_reg
, offset
);
11207 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11208 stack_pointer_rtx
));
11210 current
= scratch_reg
;
11213 ix86_expand_branch (GEU
, current
, limit
, label
);
11214 jump_insn
= get_last_insn ();
11215 JUMP_LABEL (jump_insn
) = label
;
11217 /* Mark the jump as very likely to be taken. */
11218 add_reg_note (jump_insn
, REG_BR_PROB
,
11219 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11221 if (split_stack_fn
== NULL_RTX
)
11222 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11223 fn
= split_stack_fn
;
11225 /* Get more stack space. We pass in the desired stack space and the
11226 size of the arguments to copy to the new stack. In 32-bit mode
11227 we push the parameters; __morestack will return on a new stack
11228 anyhow. In 64-bit mode we pass the parameters in r10 and
11230 allocate_rtx
= GEN_INT (allocate
);
11231 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11232 call_fusage
= NULL_RTX
;
11237 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11238 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11240 /* If this function uses a static chain, it will be in %r10.
11241 Preserve it across the call to __morestack. */
11242 if (DECL_STATIC_CHAIN (cfun
->decl
))
11246 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11247 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11248 use_reg (&call_fusage
, rax
);
11251 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11253 HOST_WIDE_INT argval
;
11255 gcc_assert (Pmode
== DImode
);
11256 /* When using the large model we need to load the address
11257 into a register, and we've run out of registers. So we
11258 switch to a different calling convention, and we call a
11259 different function: __morestack_large. We pass the
11260 argument size in the upper 32 bits of r10 and pass the
11261 frame size in the lower 32 bits. */
11262 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11263 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11265 if (split_stack_fn_large
== NULL_RTX
)
11266 split_stack_fn_large
=
11267 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11269 if (ix86_cmodel
== CM_LARGE_PIC
)
11273 label
= gen_label_rtx ();
11274 emit_label (label
);
11275 LABEL_PRESERVE_P (label
) = 1;
11276 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11277 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11278 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11279 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11281 x
= gen_rtx_CONST (Pmode
, x
);
11282 emit_move_insn (reg11
, x
);
11283 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11284 x
= gen_const_mem (Pmode
, x
);
11285 emit_move_insn (reg11
, x
);
11288 emit_move_insn (reg11
, split_stack_fn_large
);
11292 argval
= ((args_size
<< 16) << 16) + allocate
;
11293 emit_move_insn (reg10
, GEN_INT (argval
));
11297 emit_move_insn (reg10
, allocate_rtx
);
11298 emit_move_insn (reg11
, GEN_INT (args_size
));
11299 use_reg (&call_fusage
, reg11
);
11302 use_reg (&call_fusage
, reg10
);
11306 emit_insn (gen_push (GEN_INT (args_size
)));
11307 emit_insn (gen_push (allocate_rtx
));
11309 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11310 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11312 add_function_usage_to (call_insn
, call_fusage
);
11314 /* In order to make call/return prediction work right, we now need
11315 to execute a return instruction. See
11316 libgcc/config/i386/morestack.S for the details on how this works.
11318 For flow purposes gcc must not see this as a return
11319 instruction--we need control flow to continue at the subsequent
11320 label. Therefore, we use an unspec. */
11321 gcc_assert (crtl
->args
.pops_args
< 65536);
11322 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11324 /* If we are in 64-bit mode and this function uses a static chain,
11325 we saved %r10 in %rax before calling _morestack. */
11326 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11327 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11328 gen_rtx_REG (word_mode
, AX_REG
));
11330 /* If this function calls va_start, we need to store a pointer to
11331 the arguments on the old stack, because they may not have been
11332 all copied to the new stack. At this point the old stack can be
11333 found at the frame pointer value used by __morestack, because
11334 __morestack has set that up before calling back to us. Here we
11335 store that pointer in a scratch register, and in
11336 ix86_expand_prologue we store the scratch register in a stack
11338 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11340 unsigned int scratch_regno
;
11344 scratch_regno
= split_stack_prologue_scratch_regno ();
11345 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11346 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11350 return address within this function
11351 return address of caller of this function
11353 So we add three words to get to the stack arguments.
11357 return address within this function
11358 first argument to __morestack
11359 second argument to __morestack
11360 return address of caller of this function
11362 So we add five words to get to the stack arguments.
11364 words
= TARGET_64BIT
? 3 : 5;
11365 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11366 gen_rtx_PLUS (Pmode
, frame_reg
,
11367 GEN_INT (words
* UNITS_PER_WORD
))));
11369 varargs_label
= gen_label_rtx ();
11370 emit_jump_insn (gen_jump (varargs_label
));
11371 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11376 emit_label (label
);
11377 LABEL_NUSES (label
) = 1;
11379 /* If this function calls va_start, we now have to set the scratch
11380 register for the case where we do not call __morestack. In this
11381 case we need to set it based on the stack pointer. */
11382 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11384 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11385 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11386 GEN_INT (UNITS_PER_WORD
))));
11388 emit_label (varargs_label
);
11389 LABEL_NUSES (varargs_label
) = 1;
11393 /* We may have to tell the dataflow pass that the split stack prologue
11394 is initializing a scratch register. */
11397 ix86_live_on_entry (bitmap regs
)
11399 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11401 gcc_assert (flag_split_stack
);
11402 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11406 /* Determine if op is suitable SUBREG RTX for address. */
11409 ix86_address_subreg_operand (rtx op
)
11411 enum machine_mode mode
;
11416 mode
= GET_MODE (op
);
11418 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11421 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11422 failures when the register is one word out of a two word structure. */
11423 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11426 /* Allow only SUBREGs of non-eliminable hard registers. */
11427 return register_no_elim_operand (op
, mode
);
11430 /* Extract the parts of an RTL expression that is a valid memory address
11431 for an instruction. Return 0 if the structure of the address is
11432 grossly off. Return -1 if the address contains ASHIFT, so it is not
11433 strictly valid, but still used for computing length of lea instruction. */
11436 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11438 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11439 rtx base_reg
, index_reg
;
11440 HOST_WIDE_INT scale
= 1;
11441 rtx scale_rtx
= NULL_RTX
;
11444 enum ix86_address_seg seg
= SEG_DEFAULT
;
11446 /* Allow zero-extended SImode addresses,
11447 they will be emitted with addr32 prefix. */
11448 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11450 if (GET_CODE (addr
) == ZERO_EXTEND
11451 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11453 addr
= XEXP (addr
, 0);
11454 if (CONST_INT_P (addr
))
11457 else if (GET_CODE (addr
) == AND
11458 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11460 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11461 if (addr
== NULL_RTX
)
11464 if (CONST_INT_P (addr
))
11469 /* Allow SImode subregs of DImode addresses,
11470 they will be emitted with addr32 prefix. */
11471 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11473 if (GET_CODE (addr
) == SUBREG
11474 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11476 addr
= SUBREG_REG (addr
);
11477 if (CONST_INT_P (addr
))
11484 else if (GET_CODE (addr
) == SUBREG
)
11486 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11491 else if (GET_CODE (addr
) == PLUS
)
11493 rtx addends
[4], op
;
11501 addends
[n
++] = XEXP (op
, 1);
11504 while (GET_CODE (op
) == PLUS
);
11509 for (i
= n
; i
>= 0; --i
)
11512 switch (GET_CODE (op
))
11517 index
= XEXP (op
, 0);
11518 scale_rtx
= XEXP (op
, 1);
11524 index
= XEXP (op
, 0);
11525 tmp
= XEXP (op
, 1);
11526 if (!CONST_INT_P (tmp
))
11528 scale
= INTVAL (tmp
);
11529 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11531 scale
= 1 << scale
;
11536 if (GET_CODE (op
) != UNSPEC
)
11541 if (XINT (op
, 1) == UNSPEC_TP
11542 && TARGET_TLS_DIRECT_SEG_REFS
11543 && seg
== SEG_DEFAULT
)
11544 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11550 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11577 else if (GET_CODE (addr
) == MULT
)
11579 index
= XEXP (addr
, 0); /* index*scale */
11580 scale_rtx
= XEXP (addr
, 1);
11582 else if (GET_CODE (addr
) == ASHIFT
)
11584 /* We're called for lea too, which implements ashift on occasion. */
11585 index
= XEXP (addr
, 0);
11586 tmp
= XEXP (addr
, 1);
11587 if (!CONST_INT_P (tmp
))
11589 scale
= INTVAL (tmp
);
11590 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11592 scale
= 1 << scale
;
11595 else if (CONST_INT_P (addr
))
11597 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11600 /* Constant addresses are sign extended to 64bit, we have to
11601 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11603 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11609 disp
= addr
; /* displacement */
11615 else if (GET_CODE (index
) == SUBREG
11616 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11622 /* Address override works only on the (%reg) part of %fs:(%reg). */
11623 if (seg
!= SEG_DEFAULT
11624 && ((base
&& GET_MODE (base
) != word_mode
)
11625 || (index
&& GET_MODE (index
) != word_mode
)))
11628 /* Extract the integral value of scale. */
11631 if (!CONST_INT_P (scale_rtx
))
11633 scale
= INTVAL (scale_rtx
);
11636 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11637 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11639 /* Avoid useless 0 displacement. */
11640 if (disp
== const0_rtx
&& (base
|| index
))
11643 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11644 if (base_reg
&& index_reg
&& scale
== 1
11645 && (index_reg
== arg_pointer_rtx
11646 || index_reg
== frame_pointer_rtx
11647 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11650 tmp
= base
, base
= index
, index
= tmp
;
11651 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11654 /* Special case: %ebp cannot be encoded as a base without a displacement.
11658 && (base_reg
== hard_frame_pointer_rtx
11659 || base_reg
== frame_pointer_rtx
11660 || base_reg
== arg_pointer_rtx
11661 || (REG_P (base_reg
)
11662 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11663 || REGNO (base_reg
) == R13_REG
))))
11666 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11667 Avoid this by transforming to [%esi+0].
11668 Reload calls address legitimization without cfun defined, so we need
11669 to test cfun for being non-NULL. */
11670 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11671 && base_reg
&& !index_reg
&& !disp
11672 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11675 /* Special case: encode reg+reg instead of reg*2. */
11676 if (!base
&& index
&& scale
== 2)
11677 base
= index
, base_reg
= index_reg
, scale
= 1;
11679 /* Special case: scaling cannot be encoded without base or displacement. */
11680 if (!base
&& !disp
&& index
&& scale
!= 1)
11684 out
->index
= index
;
11686 out
->scale
= scale
;
11692 /* Return cost of the memory address x.
11693 For i386, it is better to use a complex address than let gcc copy
11694 the address into a reg and make a new pseudo. But not if the address
11695 requires to two regs - that would mean more pseudos with longer
11698 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11699 addr_space_t as ATTRIBUTE_UNUSED
,
11700 bool speed ATTRIBUTE_UNUSED
)
11702 struct ix86_address parts
;
11704 int ok
= ix86_decompose_address (x
, &parts
);
11708 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11709 parts
.base
= SUBREG_REG (parts
.base
);
11710 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11711 parts
.index
= SUBREG_REG (parts
.index
);
11713 /* Attempt to minimize number of registers in the address. */
11715 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11717 && (!REG_P (parts
.index
)
11718 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11722 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11724 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11725 && parts
.base
!= parts
.index
)
11728 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11729 since it's predecode logic can't detect the length of instructions
11730 and it degenerates to vector decoded. Increase cost of such
11731 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11732 to split such addresses or even refuse such addresses at all.
11734 Following addressing modes are affected:
11739 The first and last case may be avoidable by explicitly coding the zero in
11740 memory address, but I don't have AMD-K6 machine handy to check this
11744 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11745 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11746 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11752 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11753 this is used for to form addresses to local data when -fPIC is in
11757 darwin_local_data_pic (rtx disp
)
11759 return (GET_CODE (disp
) == UNSPEC
11760 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11763 /* Determine if a given RTX is a valid constant. We already know this
11764 satisfies CONSTANT_P. */
11767 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11769 switch (GET_CODE (x
))
11774 if (GET_CODE (x
) == PLUS
)
11776 if (!CONST_INT_P (XEXP (x
, 1)))
11781 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11784 /* Only some unspecs are valid as "constants". */
11785 if (GET_CODE (x
) == UNSPEC
)
11786 switch (XINT (x
, 1))
11789 case UNSPEC_GOTOFF
:
11790 case UNSPEC_PLTOFF
:
11791 return TARGET_64BIT
;
11793 case UNSPEC_NTPOFF
:
11794 x
= XVECEXP (x
, 0, 0);
11795 return (GET_CODE (x
) == SYMBOL_REF
11796 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11797 case UNSPEC_DTPOFF
:
11798 x
= XVECEXP (x
, 0, 0);
11799 return (GET_CODE (x
) == SYMBOL_REF
11800 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11805 /* We must have drilled down to a symbol. */
11806 if (GET_CODE (x
) == LABEL_REF
)
11808 if (GET_CODE (x
) != SYMBOL_REF
)
11813 /* TLS symbols are never valid. */
11814 if (SYMBOL_REF_TLS_MODEL (x
))
11817 /* DLLIMPORT symbols are never valid. */
11818 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11819 && SYMBOL_REF_DLLIMPORT_P (x
))
11823 /* mdynamic-no-pic */
11824 if (MACHO_DYNAMIC_NO_PIC_P
)
11825 return machopic_symbol_defined_p (x
);
11830 if (GET_MODE (x
) == TImode
11831 && x
!= CONST0_RTX (TImode
)
11837 if (!standard_sse_constant_p (x
))
11844 /* Otherwise we handle everything else in the move patterns. */
11848 /* Determine if it's legal to put X into the constant pool. This
11849 is not possible for the address of thread-local symbols, which
11850 is checked above. */
11853 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11855 /* We can always put integral constants and vectors in memory. */
11856 switch (GET_CODE (x
))
11866 return !ix86_legitimate_constant_p (mode
, x
);
11870 /* Nonzero if the constant value X is a legitimate general operand
11871 when generating PIC code. It is given that flag_pic is on and
11872 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11875 legitimate_pic_operand_p (rtx x
)
11879 switch (GET_CODE (x
))
11882 inner
= XEXP (x
, 0);
11883 if (GET_CODE (inner
) == PLUS
11884 && CONST_INT_P (XEXP (inner
, 1)))
11885 inner
= XEXP (inner
, 0);
11887 /* Only some unspecs are valid as "constants". */
11888 if (GET_CODE (inner
) == UNSPEC
)
11889 switch (XINT (inner
, 1))
11892 case UNSPEC_GOTOFF
:
11893 case UNSPEC_PLTOFF
:
11894 return TARGET_64BIT
;
11896 x
= XVECEXP (inner
, 0, 0);
11897 return (GET_CODE (x
) == SYMBOL_REF
11898 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11899 case UNSPEC_MACHOPIC_OFFSET
:
11900 return legitimate_pic_address_disp_p (x
);
11908 return legitimate_pic_address_disp_p (x
);
11915 /* Determine if a given CONST RTX is a valid memory displacement
11919 legitimate_pic_address_disp_p (rtx disp
)
11923 /* In 64bit mode we can allow direct addresses of symbols and labels
11924 when they are not dynamic symbols. */
11927 rtx op0
= disp
, op1
;
11929 switch (GET_CODE (disp
))
11935 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11937 op0
= XEXP (XEXP (disp
, 0), 0);
11938 op1
= XEXP (XEXP (disp
, 0), 1);
11939 if (!CONST_INT_P (op1
)
11940 || INTVAL (op1
) >= 16*1024*1024
11941 || INTVAL (op1
) < -16*1024*1024)
11943 if (GET_CODE (op0
) == LABEL_REF
)
11945 if (GET_CODE (op0
) == CONST
11946 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11947 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11949 if (GET_CODE (op0
) == UNSPEC
11950 && XINT (op0
, 1) == UNSPEC_PCREL
)
11952 if (GET_CODE (op0
) != SYMBOL_REF
)
11957 /* TLS references should always be enclosed in UNSPEC. */
11958 if (SYMBOL_REF_TLS_MODEL (op0
))
11960 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11961 && ix86_cmodel
!= CM_LARGE_PIC
)
11969 if (GET_CODE (disp
) != CONST
)
11971 disp
= XEXP (disp
, 0);
11975 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11976 of GOT tables. We should not need these anyway. */
11977 if (GET_CODE (disp
) != UNSPEC
11978 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11979 && XINT (disp
, 1) != UNSPEC_GOTOFF
11980 && XINT (disp
, 1) != UNSPEC_PCREL
11981 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11984 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11985 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11991 if (GET_CODE (disp
) == PLUS
)
11993 if (!CONST_INT_P (XEXP (disp
, 1)))
11995 disp
= XEXP (disp
, 0);
11999 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12002 if (GET_CODE (disp
) != UNSPEC
)
12005 switch (XINT (disp
, 1))
12010 /* We need to check for both symbols and labels because VxWorks loads
12011 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12013 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12014 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12015 case UNSPEC_GOTOFF
:
12016 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12017 While ABI specify also 32bit relocation but we don't produce it in
12018 small PIC model at all. */
12019 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12020 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12022 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12024 case UNSPEC_GOTTPOFF
:
12025 case UNSPEC_GOTNTPOFF
:
12026 case UNSPEC_INDNTPOFF
:
12029 disp
= XVECEXP (disp
, 0, 0);
12030 return (GET_CODE (disp
) == SYMBOL_REF
12031 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12032 case UNSPEC_NTPOFF
:
12033 disp
= XVECEXP (disp
, 0, 0);
12034 return (GET_CODE (disp
) == SYMBOL_REF
12035 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12036 case UNSPEC_DTPOFF
:
12037 disp
= XVECEXP (disp
, 0, 0);
12038 return (GET_CODE (disp
) == SYMBOL_REF
12039 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12045 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12046 replace the input X, or the original X if no replacement is called for.
12047 The output parameter *WIN is 1 if the calling macro should goto WIN,
12048 0 if it should not. */
12051 ix86_legitimize_reload_address (rtx x
,
12052 enum machine_mode mode ATTRIBUTE_UNUSED
,
12053 int opnum
, int type
,
12054 int ind_levels ATTRIBUTE_UNUSED
)
12056 /* Reload can generate:
12058 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12062 This RTX is rejected from ix86_legitimate_address_p due to
12063 non-strictness of base register 97. Following this rejection,
12064 reload pushes all three components into separate registers,
12065 creating invalid memory address RTX.
12067 Following code reloads only the invalid part of the
12068 memory address RTX. */
12070 if (GET_CODE (x
) == PLUS
12071 && REG_P (XEXP (x
, 1))
12072 && GET_CODE (XEXP (x
, 0)) == PLUS
12073 && REG_P (XEXP (XEXP (x
, 0), 1)))
12076 bool something_reloaded
= false;
12078 base
= XEXP (XEXP (x
, 0), 1);
12079 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12081 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12082 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12083 opnum
, (enum reload_type
) type
);
12084 something_reloaded
= true;
12087 index
= XEXP (x
, 1);
12088 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12090 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12091 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12092 opnum
, (enum reload_type
) type
);
12093 something_reloaded
= true;
12096 gcc_assert (something_reloaded
);
12103 /* Recognizes RTL expressions that are valid memory addresses for an
12104 instruction. The MODE argument is the machine mode for the MEM
12105 expression that wants to use this address.
12107 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12108 convert common non-canonical forms to canonical form so that they will
12112 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12113 rtx addr
, bool strict
)
12115 struct ix86_address parts
;
12116 rtx base
, index
, disp
;
12117 HOST_WIDE_INT scale
;
12119 if (ix86_decompose_address (addr
, &parts
) <= 0)
12120 /* Decomposition failed. */
12124 index
= parts
.index
;
12126 scale
= parts
.scale
;
12128 /* Validate base register. */
12135 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12136 reg
= SUBREG_REG (base
);
12138 /* Base is not a register. */
12141 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12144 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12145 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12146 /* Base is not valid. */
12150 /* Validate index register. */
12157 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12158 reg
= SUBREG_REG (index
);
12160 /* Index is not a register. */
12163 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12166 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12167 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12168 /* Index is not valid. */
12172 /* Index and base should have the same mode. */
12174 && GET_MODE (base
) != GET_MODE (index
))
12177 /* Validate scale factor. */
12181 /* Scale without index. */
12184 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12185 /* Scale is not a valid multiplier. */
12189 /* Validate displacement. */
12192 if (GET_CODE (disp
) == CONST
12193 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12194 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12195 switch (XINT (XEXP (disp
, 0), 1))
12197 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12198 used. While ABI specify also 32bit relocations, we don't produce
12199 them at all and use IP relative instead. */
12201 case UNSPEC_GOTOFF
:
12202 gcc_assert (flag_pic
);
12204 goto is_legitimate_pic
;
12206 /* 64bit address unspec. */
12209 case UNSPEC_GOTPCREL
:
12211 gcc_assert (flag_pic
);
12212 goto is_legitimate_pic
;
12214 case UNSPEC_GOTTPOFF
:
12215 case UNSPEC_GOTNTPOFF
:
12216 case UNSPEC_INDNTPOFF
:
12217 case UNSPEC_NTPOFF
:
12218 case UNSPEC_DTPOFF
:
12221 case UNSPEC_STACK_CHECK
:
12222 gcc_assert (flag_split_stack
);
12226 /* Invalid address unspec. */
12230 else if (SYMBOLIC_CONST (disp
)
12234 && MACHOPIC_INDIRECT
12235 && !machopic_operand_p (disp
)
12241 if (TARGET_64BIT
&& (index
|| base
))
12243 /* foo@dtpoff(%rX) is ok. */
12244 if (GET_CODE (disp
) != CONST
12245 || GET_CODE (XEXP (disp
, 0)) != PLUS
12246 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12247 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12248 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12249 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12250 /* Non-constant pic memory reference. */
12253 else if ((!TARGET_MACHO
|| flag_pic
)
12254 && ! legitimate_pic_address_disp_p (disp
))
12255 /* Displacement is an invalid pic construct. */
12258 else if (MACHO_DYNAMIC_NO_PIC_P
12259 && !ix86_legitimate_constant_p (Pmode
, disp
))
12260 /* displacment must be referenced via non_lazy_pointer */
12264 /* This code used to verify that a symbolic pic displacement
12265 includes the pic_offset_table_rtx register.
12267 While this is good idea, unfortunately these constructs may
12268 be created by "adds using lea" optimization for incorrect
12277 This code is nonsensical, but results in addressing
12278 GOT table with pic_offset_table_rtx base. We can't
12279 just refuse it easily, since it gets matched by
12280 "addsi3" pattern, that later gets split to lea in the
12281 case output register differs from input. While this
12282 can be handled by separate addsi pattern for this case
12283 that never results in lea, this seems to be easier and
12284 correct fix for crash to disable this test. */
12286 else if (GET_CODE (disp
) != LABEL_REF
12287 && !CONST_INT_P (disp
)
12288 && (GET_CODE (disp
) != CONST
12289 || !ix86_legitimate_constant_p (Pmode
, disp
))
12290 && (GET_CODE (disp
) != SYMBOL_REF
12291 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12292 /* Displacement is not constant. */
12294 else if (TARGET_64BIT
12295 && !x86_64_immediate_operand (disp
, VOIDmode
))
12296 /* Displacement is out of range. */
12300 /* Everything looks valid. */
12304 /* Determine if a given RTX is a valid constant address. */
12307 constant_address_p (rtx x
)
12309 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12312 /* Return a unique alias set for the GOT. */
12314 static alias_set_type
12315 ix86_GOT_alias_set (void)
12317 static alias_set_type set
= -1;
12319 set
= new_alias_set ();
12323 /* Return a legitimate reference for ORIG (an address) using the
12324 register REG. If REG is 0, a new pseudo is generated.
12326 There are two types of references that must be handled:
12328 1. Global data references must load the address from the GOT, via
12329 the PIC reg. An insn is emitted to do this load, and the reg is
12332 2. Static data references, constant pool addresses, and code labels
12333 compute the address as an offset from the GOT, whose base is in
12334 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12335 differentiate them from global data objects. The returned
12336 address is the PIC reg + an unspec constant.
12338 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12339 reg also appears in the address. */
12342 legitimize_pic_address (rtx orig
, rtx reg
)
12345 rtx new_rtx
= orig
;
12348 if (TARGET_MACHO
&& !TARGET_64BIT
)
12351 reg
= gen_reg_rtx (Pmode
);
12352 /* Use the generic Mach-O PIC machinery. */
12353 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12357 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12359 else if (TARGET_64BIT
12360 && ix86_cmodel
!= CM_SMALL_PIC
12361 && gotoff_operand (addr
, Pmode
))
12364 /* This symbol may be referenced via a displacement from the PIC
12365 base address (@GOTOFF). */
12367 if (reload_in_progress
)
12368 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12369 if (GET_CODE (addr
) == CONST
)
12370 addr
= XEXP (addr
, 0);
12371 if (GET_CODE (addr
) == PLUS
)
12373 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12375 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12378 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12379 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12381 tmpreg
= gen_reg_rtx (Pmode
);
12384 emit_move_insn (tmpreg
, new_rtx
);
12388 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12389 tmpreg
, 1, OPTAB_DIRECT
);
12392 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12394 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12396 /* This symbol may be referenced via a displacement from the PIC
12397 base address (@GOTOFF). */
12399 if (reload_in_progress
)
12400 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12401 if (GET_CODE (addr
) == CONST
)
12402 addr
= XEXP (addr
, 0);
12403 if (GET_CODE (addr
) == PLUS
)
12405 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12407 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12410 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12411 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12412 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12416 emit_move_insn (reg
, new_rtx
);
12420 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12421 /* We can't use @GOTOFF for text labels on VxWorks;
12422 see gotoff_operand. */
12423 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12425 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12427 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12428 return legitimize_dllimport_symbol (addr
, true);
12429 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12430 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12431 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12433 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12434 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12438 /* For x64 PE-COFF there is no GOT table. So we use address
12440 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12442 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12443 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12446 reg
= gen_reg_rtx (Pmode
);
12447 emit_move_insn (reg
, new_rtx
);
12450 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12452 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12453 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12454 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12455 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12458 reg
= gen_reg_rtx (Pmode
);
12459 /* Use directly gen_movsi, otherwise the address is loaded
12460 into register for CSE. We don't want to CSE this addresses,
12461 instead we CSE addresses from the GOT table, so skip this. */
12462 emit_insn (gen_movsi (reg
, new_rtx
));
12467 /* This symbol must be referenced via a load from the
12468 Global Offset Table (@GOT). */
12470 if (reload_in_progress
)
12471 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12472 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12473 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12475 new_rtx
= force_reg (Pmode
, new_rtx
);
12476 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12477 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12478 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12481 reg
= gen_reg_rtx (Pmode
);
12482 emit_move_insn (reg
, new_rtx
);
12488 if (CONST_INT_P (addr
)
12489 && !x86_64_immediate_operand (addr
, VOIDmode
))
12493 emit_move_insn (reg
, addr
);
12497 new_rtx
= force_reg (Pmode
, addr
);
12499 else if (GET_CODE (addr
) == CONST
)
12501 addr
= XEXP (addr
, 0);
12503 /* We must match stuff we generate before. Assume the only
12504 unspecs that can get here are ours. Not that we could do
12505 anything with them anyway.... */
12506 if (GET_CODE (addr
) == UNSPEC
12507 || (GET_CODE (addr
) == PLUS
12508 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12510 gcc_assert (GET_CODE (addr
) == PLUS
);
12512 if (GET_CODE (addr
) == PLUS
)
12514 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12516 /* Check first to see if this is a constant offset from a @GOTOFF
12517 symbol reference. */
12518 if (gotoff_operand (op0
, Pmode
)
12519 && CONST_INT_P (op1
))
12523 if (reload_in_progress
)
12524 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12525 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12527 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12528 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12529 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12533 emit_move_insn (reg
, new_rtx
);
12539 if (INTVAL (op1
) < -16*1024*1024
12540 || INTVAL (op1
) >= 16*1024*1024)
12542 if (!x86_64_immediate_operand (op1
, Pmode
))
12543 op1
= force_reg (Pmode
, op1
);
12544 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12550 rtx base
= legitimize_pic_address (op0
, reg
);
12551 enum machine_mode mode
= GET_MODE (base
);
12553 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12555 if (CONST_INT_P (new_rtx
))
12557 if (INTVAL (new_rtx
) < -16*1024*1024
12558 || INTVAL (new_rtx
) >= 16*1024*1024)
12560 if (!x86_64_immediate_operand (new_rtx
, mode
))
12561 new_rtx
= force_reg (mode
, new_rtx
);
12563 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12566 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12570 if (GET_CODE (new_rtx
) == PLUS
12571 && CONSTANT_P (XEXP (new_rtx
, 1)))
12573 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12574 new_rtx
= XEXP (new_rtx
, 1);
12576 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12584 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12587 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12589 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12591 if (GET_MODE (tp
) != tp_mode
)
12593 gcc_assert (GET_MODE (tp
) == SImode
);
12594 gcc_assert (tp_mode
== DImode
);
12596 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12600 tp
= copy_to_mode_reg (tp_mode
, tp
);
12605 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12607 static GTY(()) rtx ix86_tls_symbol
;
12610 ix86_tls_get_addr (void)
12612 if (!ix86_tls_symbol
)
12615 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12616 ? "___tls_get_addr" : "__tls_get_addr");
12618 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12621 return ix86_tls_symbol
;
12624 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12626 static GTY(()) rtx ix86_tls_module_base_symbol
;
12629 ix86_tls_module_base (void)
12631 if (!ix86_tls_module_base_symbol
)
12633 ix86_tls_module_base_symbol
12634 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12636 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12637 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12640 return ix86_tls_module_base_symbol
;
12643 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12644 false if we expect this to be used for a memory address and true if
12645 we expect to load the address into a register. */
12648 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12650 rtx dest
, base
, off
;
12651 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12652 enum machine_mode tp_mode
= Pmode
;
12657 case TLS_MODEL_GLOBAL_DYNAMIC
:
12658 dest
= gen_reg_rtx (Pmode
);
12663 pic
= pic_offset_table_rtx
;
12666 pic
= gen_reg_rtx (Pmode
);
12667 emit_insn (gen_set_got (pic
));
12671 if (TARGET_GNU2_TLS
)
12674 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12676 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12678 tp
= get_thread_pointer (Pmode
, true);
12679 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12681 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12685 rtx caddr
= ix86_tls_get_addr ();
12689 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12692 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12694 insns
= get_insns ();
12697 RTL_CONST_CALL_P (insns
) = 1;
12698 emit_libcall_block (insns
, dest
, rax
, x
);
12701 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12705 case TLS_MODEL_LOCAL_DYNAMIC
:
12706 base
= gen_reg_rtx (Pmode
);
12711 pic
= pic_offset_table_rtx
;
12714 pic
= gen_reg_rtx (Pmode
);
12715 emit_insn (gen_set_got (pic
));
12719 if (TARGET_GNU2_TLS
)
12721 rtx tmp
= ix86_tls_module_base ();
12724 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12726 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12728 tp
= get_thread_pointer (Pmode
, true);
12729 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12730 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12734 rtx caddr
= ix86_tls_get_addr ();
12738 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12741 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12743 insns
= get_insns ();
12746 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12747 share the LD_BASE result with other LD model accesses. */
12748 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12749 UNSPEC_TLS_LD_BASE
);
12751 RTL_CONST_CALL_P (insns
) = 1;
12752 emit_libcall_block (insns
, base
, rax
, eqv
);
12755 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12758 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12759 off
= gen_rtx_CONST (Pmode
, off
);
12761 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12763 if (TARGET_GNU2_TLS
)
12765 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12767 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12771 case TLS_MODEL_INITIAL_EXEC
:
12774 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12776 /* The Sun linker took the AMD64 TLS spec literally
12777 and can only handle %rax as destination of the
12778 initial executable code sequence. */
12780 dest
= gen_reg_rtx (DImode
);
12781 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12785 /* Generate DImode references to avoid %fs:(%reg32)
12786 problems and linker IE->LE relaxation bug. */
12789 type
= UNSPEC_GOTNTPOFF
;
12793 if (reload_in_progress
)
12794 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12795 pic
= pic_offset_table_rtx
;
12796 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12798 else if (!TARGET_ANY_GNU_TLS
)
12800 pic
= gen_reg_rtx (Pmode
);
12801 emit_insn (gen_set_got (pic
));
12802 type
= UNSPEC_GOTTPOFF
;
12807 type
= UNSPEC_INDNTPOFF
;
12810 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12811 off
= gen_rtx_CONST (tp_mode
, off
);
12813 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12814 off
= gen_const_mem (tp_mode
, off
);
12815 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12817 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12819 base
= get_thread_pointer (tp_mode
,
12820 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12821 off
= force_reg (tp_mode
, off
);
12822 return gen_rtx_PLUS (tp_mode
, base
, off
);
12826 base
= get_thread_pointer (Pmode
, true);
12827 dest
= gen_reg_rtx (Pmode
);
12828 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12832 case TLS_MODEL_LOCAL_EXEC
:
12833 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12834 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12835 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12836 off
= gen_rtx_CONST (Pmode
, off
);
12838 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12840 base
= get_thread_pointer (Pmode
,
12841 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12842 return gen_rtx_PLUS (Pmode
, base
, off
);
12846 base
= get_thread_pointer (Pmode
, true);
12847 dest
= gen_reg_rtx (Pmode
);
12848 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12853 gcc_unreachable ();
12859 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12862 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12863 htab_t dllimport_map
;
12866 get_dllimport_decl (tree decl
)
12868 struct tree_map
*h
, in
;
12871 const char *prefix
;
12872 size_t namelen
, prefixlen
;
12877 if (!dllimport_map
)
12878 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12880 in
.hash
= htab_hash_pointer (decl
);
12881 in
.base
.from
= decl
;
12882 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12883 h
= (struct tree_map
*) *loc
;
12887 *loc
= h
= ggc_alloc_tree_map ();
12889 h
->base
.from
= decl
;
12890 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12891 VAR_DECL
, NULL
, ptr_type_node
);
12892 DECL_ARTIFICIAL (to
) = 1;
12893 DECL_IGNORED_P (to
) = 1;
12894 DECL_EXTERNAL (to
) = 1;
12895 TREE_READONLY (to
) = 1;
12897 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12898 name
= targetm
.strip_name_encoding (name
);
12899 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12900 ? "*__imp_" : "*__imp__";
12901 namelen
= strlen (name
);
12902 prefixlen
= strlen (prefix
);
12903 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12904 memcpy (imp_name
, prefix
, prefixlen
);
12905 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12907 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12908 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12909 SET_SYMBOL_REF_DECL (rtl
, to
);
12910 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12912 rtl
= gen_const_mem (Pmode
, rtl
);
12913 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12915 SET_DECL_RTL (to
, rtl
);
12916 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12921 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12922 true if we require the result be a register. */
12925 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12930 gcc_assert (SYMBOL_REF_DECL (symbol
));
12931 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12933 x
= DECL_RTL (imp_decl
);
12935 x
= force_reg (Pmode
, x
);
12939 /* Try machine-dependent ways of modifying an illegitimate address
12940 to be legitimate. If we find one, return the new, valid address.
12941 This macro is used in only one place: `memory_address' in explow.c.
12943 OLDX is the address as it was before break_out_memory_refs was called.
12944 In some cases it is useful to look at this to decide what needs to be done.
12946 It is always safe for this macro to do nothing. It exists to recognize
12947 opportunities to optimize the output.
12949 For the 80386, we handle X+REG by loading X into a register R and
12950 using R+REG. R will go in a general reg and indexing will be used.
12951 However, if REG is a broken-out memory address or multiplication,
12952 nothing needs to be done because REG can certainly go in a general reg.
12954 When -fpic is used, special handling is needed for symbolic references.
12955 See comments by legitimize_pic_address in i386.c for details. */
12958 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12959 enum machine_mode mode
)
12964 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12966 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12967 if (GET_CODE (x
) == CONST
12968 && GET_CODE (XEXP (x
, 0)) == PLUS
12969 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12970 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12972 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12973 (enum tls_model
) log
, false);
12974 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12977 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12979 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12980 return legitimize_dllimport_symbol (x
, true);
12981 if (GET_CODE (x
) == CONST
12982 && GET_CODE (XEXP (x
, 0)) == PLUS
12983 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12984 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12986 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12987 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12991 if (flag_pic
&& SYMBOLIC_CONST (x
))
12992 return legitimize_pic_address (x
, 0);
12995 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12996 return machopic_indirect_data_reference (x
, 0);
12999 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13000 if (GET_CODE (x
) == ASHIFT
13001 && CONST_INT_P (XEXP (x
, 1))
13002 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13005 log
= INTVAL (XEXP (x
, 1));
13006 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13007 GEN_INT (1 << log
));
13010 if (GET_CODE (x
) == PLUS
)
13012 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13014 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13015 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13016 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13019 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13020 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13021 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13022 GEN_INT (1 << log
));
13025 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13026 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13027 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13030 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13031 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13032 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13033 GEN_INT (1 << log
));
13036 /* Put multiply first if it isn't already. */
13037 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13039 rtx tmp
= XEXP (x
, 0);
13040 XEXP (x
, 0) = XEXP (x
, 1);
13045 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13046 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13047 created by virtual register instantiation, register elimination, and
13048 similar optimizations. */
13049 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13052 x
= gen_rtx_PLUS (Pmode
,
13053 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13054 XEXP (XEXP (x
, 1), 0)),
13055 XEXP (XEXP (x
, 1), 1));
13059 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13060 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13061 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13062 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13063 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13064 && CONSTANT_P (XEXP (x
, 1)))
13067 rtx other
= NULL_RTX
;
13069 if (CONST_INT_P (XEXP (x
, 1)))
13071 constant
= XEXP (x
, 1);
13072 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13074 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13076 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13077 other
= XEXP (x
, 1);
13085 x
= gen_rtx_PLUS (Pmode
,
13086 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13087 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13088 plus_constant (Pmode
, other
,
13089 INTVAL (constant
)));
13093 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13096 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13099 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13102 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13105 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13109 && REG_P (XEXP (x
, 1))
13110 && REG_P (XEXP (x
, 0)))
13113 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13116 x
= legitimize_pic_address (x
, 0);
13119 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13122 if (REG_P (XEXP (x
, 0)))
13124 rtx temp
= gen_reg_rtx (Pmode
);
13125 rtx val
= force_operand (XEXP (x
, 1), temp
);
13128 if (GET_MODE (val
) != Pmode
)
13129 val
= convert_to_mode (Pmode
, val
, 1);
13130 emit_move_insn (temp
, val
);
13133 XEXP (x
, 1) = temp
;
13137 else if (REG_P (XEXP (x
, 1)))
13139 rtx temp
= gen_reg_rtx (Pmode
);
13140 rtx val
= force_operand (XEXP (x
, 0), temp
);
13143 if (GET_MODE (val
) != Pmode
)
13144 val
= convert_to_mode (Pmode
, val
, 1);
13145 emit_move_insn (temp
, val
);
13148 XEXP (x
, 0) = temp
;
13156 /* Print an integer constant expression in assembler syntax. Addition
13157 and subtraction are the only arithmetic that may appear in these
13158 expressions. FILE is the stdio stream to write to, X is the rtx, and
13159 CODE is the operand print code from the output string. */
13162 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13166 switch (GET_CODE (x
))
13169 gcc_assert (flag_pic
);
13174 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13175 output_addr_const (file
, x
);
13178 const char *name
= XSTR (x
, 0);
13180 /* Mark the decl as referenced so that cgraph will
13181 output the function. */
13182 if (SYMBOL_REF_DECL (x
))
13183 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13186 if (MACHOPIC_INDIRECT
13187 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13188 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13190 assemble_name (file
, name
);
13192 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13193 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13194 fputs ("@PLT", file
);
13201 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13202 assemble_name (asm_out_file
, buf
);
13206 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13210 /* This used to output parentheses around the expression,
13211 but that does not work on the 386 (either ATT or BSD assembler). */
13212 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13216 if (GET_MODE (x
) == VOIDmode
)
13218 /* We can use %d if the number is <32 bits and positive. */
13219 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13220 fprintf (file
, "0x%lx%08lx",
13221 (unsigned long) CONST_DOUBLE_HIGH (x
),
13222 (unsigned long) CONST_DOUBLE_LOW (x
));
13224 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13227 /* We can't handle floating point constants;
13228 TARGET_PRINT_OPERAND must handle them. */
13229 output_operand_lossage ("floating constant misused");
13233 /* Some assemblers need integer constants to appear first. */
13234 if (CONST_INT_P (XEXP (x
, 0)))
13236 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13238 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13242 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13243 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13245 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13251 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13252 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13254 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13256 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13260 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13262 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13267 gcc_assert (XVECLEN (x
, 0) == 1);
13268 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13269 switch (XINT (x
, 1))
13272 fputs ("@GOT", file
);
13274 case UNSPEC_GOTOFF
:
13275 fputs ("@GOTOFF", file
);
13277 case UNSPEC_PLTOFF
:
13278 fputs ("@PLTOFF", file
);
13281 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13282 "(%rip)" : "[rip]", file
);
13284 case UNSPEC_GOTPCREL
:
13285 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13286 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13288 case UNSPEC_GOTTPOFF
:
13289 /* FIXME: This might be @TPOFF in Sun ld too. */
13290 fputs ("@gottpoff", file
);
13293 fputs ("@tpoff", file
);
13295 case UNSPEC_NTPOFF
:
13297 fputs ("@tpoff", file
);
13299 fputs ("@ntpoff", file
);
13301 case UNSPEC_DTPOFF
:
13302 fputs ("@dtpoff", file
);
13304 case UNSPEC_GOTNTPOFF
:
13306 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13307 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13309 fputs ("@gotntpoff", file
);
13311 case UNSPEC_INDNTPOFF
:
13312 fputs ("@indntpoff", file
);
13315 case UNSPEC_MACHOPIC_OFFSET
:
13317 machopic_output_function_base_name (file
);
13321 output_operand_lossage ("invalid UNSPEC as operand");
13327 output_operand_lossage ("invalid expression as operand");
13331 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13332 We need to emit DTP-relative relocations. */
13334 static void ATTRIBUTE_UNUSED
13335 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13337 fputs (ASM_LONG
, file
);
13338 output_addr_const (file
, x
);
13339 fputs ("@dtpoff", file
);
13345 fputs (", 0", file
);
13348 gcc_unreachable ();
13352 /* Return true if X is a representation of the PIC register. This copes
13353 with calls from ix86_find_base_term, where the register might have
13354 been replaced by a cselib value. */
13357 ix86_pic_register_p (rtx x
)
13359 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13360 return (pic_offset_table_rtx
13361 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13363 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13366 /* Helper function for ix86_delegitimize_address.
13367 Attempt to delegitimize TLS local-exec accesses. */
13370 ix86_delegitimize_tls_address (rtx orig_x
)
13372 rtx x
= orig_x
, unspec
;
13373 struct ix86_address addr
;
13375 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13379 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13381 if (ix86_decompose_address (x
, &addr
) == 0
13382 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13383 || addr
.disp
== NULL_RTX
13384 || GET_CODE (addr
.disp
) != CONST
)
13386 unspec
= XEXP (addr
.disp
, 0);
13387 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13388 unspec
= XEXP (unspec
, 0);
13389 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13391 x
= XVECEXP (unspec
, 0, 0);
13392 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13393 if (unspec
!= XEXP (addr
.disp
, 0))
13394 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13397 rtx idx
= addr
.index
;
13398 if (addr
.scale
!= 1)
13399 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13400 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13403 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13404 if (MEM_P (orig_x
))
13405 x
= replace_equiv_address_nv (orig_x
, x
);
13409 /* In the name of slightly smaller debug output, and to cater to
13410 general assembler lossage, recognize PIC+GOTOFF and turn it back
13411 into a direct symbol reference.
13413 On Darwin, this is necessary to avoid a crash, because Darwin
13414 has a different PIC label for each routine but the DWARF debugging
13415 information is not associated with any particular routine, so it's
13416 necessary to remove references to the PIC label from RTL stored by
13417 the DWARF output code. */
13420 ix86_delegitimize_address (rtx x
)
13422 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13423 /* addend is NULL or some rtx if x is something+GOTOFF where
13424 something doesn't include the PIC register. */
13425 rtx addend
= NULL_RTX
;
13426 /* reg_addend is NULL or a multiple of some register. */
13427 rtx reg_addend
= NULL_RTX
;
13428 /* const_addend is NULL or a const_int. */
13429 rtx const_addend
= NULL_RTX
;
13430 /* This is the result, or NULL. */
13431 rtx result
= NULL_RTX
;
13440 if (GET_CODE (x
) == CONST
13441 && GET_CODE (XEXP (x
, 0)) == PLUS
13442 && GET_MODE (XEXP (x
, 0)) == Pmode
13443 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13444 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13445 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13447 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13448 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13449 if (MEM_P (orig_x
))
13450 x
= replace_equiv_address_nv (orig_x
, x
);
13453 if (GET_CODE (x
) != CONST
13454 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13455 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13456 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13457 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13458 return ix86_delegitimize_tls_address (orig_x
);
13459 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13460 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13462 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13470 if (GET_CODE (x
) != PLUS
13471 || GET_CODE (XEXP (x
, 1)) != CONST
)
13472 return ix86_delegitimize_tls_address (orig_x
);
13474 if (ix86_pic_register_p (XEXP (x
, 0)))
13475 /* %ebx + GOT/GOTOFF */
13477 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13479 /* %ebx + %reg * scale + GOT/GOTOFF */
13480 reg_addend
= XEXP (x
, 0);
13481 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13482 reg_addend
= XEXP (reg_addend
, 1);
13483 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13484 reg_addend
= XEXP (reg_addend
, 0);
13487 reg_addend
= NULL_RTX
;
13488 addend
= XEXP (x
, 0);
13492 addend
= XEXP (x
, 0);
13494 x
= XEXP (XEXP (x
, 1), 0);
13495 if (GET_CODE (x
) == PLUS
13496 && CONST_INT_P (XEXP (x
, 1)))
13498 const_addend
= XEXP (x
, 1);
13502 if (GET_CODE (x
) == UNSPEC
13503 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13504 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13505 result
= XVECEXP (x
, 0, 0);
13507 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13508 && !MEM_P (orig_x
))
13509 result
= XVECEXP (x
, 0, 0);
13512 return ix86_delegitimize_tls_address (orig_x
);
13515 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13517 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13520 /* If the rest of original X doesn't involve the PIC register, add
13521 addend and subtract pic_offset_table_rtx. This can happen e.g.
13523 leal (%ebx, %ecx, 4), %ecx
13525 movl foo@GOTOFF(%ecx), %edx
13526 in which case we return (%ecx - %ebx) + foo. */
13527 if (pic_offset_table_rtx
)
13528 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13529 pic_offset_table_rtx
),
13534 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13536 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13537 if (result
== NULL_RTX
)
13543 /* If X is a machine specific address (i.e. a symbol or label being
13544 referenced as a displacement from the GOT implemented using an
13545 UNSPEC), then return the base term. Otherwise return X. */
13548 ix86_find_base_term (rtx x
)
13554 if (GET_CODE (x
) != CONST
)
13556 term
= XEXP (x
, 0);
13557 if (GET_CODE (term
) == PLUS
13558 && (CONST_INT_P (XEXP (term
, 1))
13559 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13560 term
= XEXP (term
, 0);
13561 if (GET_CODE (term
) != UNSPEC
13562 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13563 && XINT (term
, 1) != UNSPEC_PCREL
))
13566 return XVECEXP (term
, 0, 0);
13569 return ix86_delegitimize_address (x
);
13573 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13574 bool fp
, FILE *file
)
13576 const char *suffix
;
13578 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13580 code
= ix86_fp_compare_code_to_integer (code
);
13584 code
= reverse_condition (code
);
13635 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13639 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13640 Those same assemblers have the same but opposite lossage on cmov. */
13641 if (mode
== CCmode
)
13642 suffix
= fp
? "nbe" : "a";
13643 else if (mode
== CCCmode
)
13646 gcc_unreachable ();
13662 gcc_unreachable ();
13666 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13683 gcc_unreachable ();
13687 /* ??? As above. */
13688 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13689 suffix
= fp
? "nb" : "ae";
13692 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13696 /* ??? As above. */
13697 if (mode
== CCmode
)
13699 else if (mode
== CCCmode
)
13700 suffix
= fp
? "nb" : "ae";
13702 gcc_unreachable ();
13705 suffix
= fp
? "u" : "p";
13708 suffix
= fp
? "nu" : "np";
13711 gcc_unreachable ();
13713 fputs (suffix
, file
);
13716 /* Print the name of register X to FILE based on its machine mode and number.
13717 If CODE is 'w', pretend the mode is HImode.
13718 If CODE is 'b', pretend the mode is QImode.
13719 If CODE is 'k', pretend the mode is SImode.
13720 If CODE is 'q', pretend the mode is DImode.
13721 If CODE is 'x', pretend the mode is V4SFmode.
13722 If CODE is 't', pretend the mode is V8SFmode.
13723 If CODE is 'h', pretend the reg is the 'high' byte register.
13724 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13725 If CODE is 'd', duplicate the operand for AVX instruction.
13729 print_reg (rtx x
, int code
, FILE *file
)
13732 unsigned int regno
;
13733 bool duplicated
= code
== 'd' && TARGET_AVX
;
13735 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13740 gcc_assert (TARGET_64BIT
);
13741 fputs ("rip", file
);
13745 regno
= true_regnum (x
);
13746 gcc_assert (regno
!= ARG_POINTER_REGNUM
13747 && regno
!= FRAME_POINTER_REGNUM
13748 && regno
!= FLAGS_REG
13749 && regno
!= FPSR_REG
13750 && regno
!= FPCR_REG
);
13752 if (code
== 'w' || MMX_REG_P (x
))
13754 else if (code
== 'b')
13756 else if (code
== 'k')
13758 else if (code
== 'q')
13760 else if (code
== 'y')
13762 else if (code
== 'h')
13764 else if (code
== 'x')
13766 else if (code
== 't')
13769 code
= GET_MODE_SIZE (GET_MODE (x
));
13771 /* Irritatingly, AMD extended registers use different naming convention
13772 from the normal registers: "r%d[bwd]" */
13773 if (REX_INT_REGNO_P (regno
))
13775 gcc_assert (TARGET_64BIT
);
13777 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
13781 error ("extended registers have no high halves");
13796 error ("unsupported operand size for extended register");
13806 if (STACK_TOP_P (x
))
13815 if (! ANY_FP_REG_P (x
))
13816 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13821 reg
= hi_reg_name
[regno
];
13824 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13826 reg
= qi_reg_name
[regno
];
13829 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13831 reg
= qi_high_reg_name
[regno
];
13836 gcc_assert (!duplicated
);
13838 fputs (hi_reg_name
[regno
] + 1, file
);
13843 gcc_unreachable ();
13849 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13850 fprintf (file
, ", %%%s", reg
);
13852 fprintf (file
, ", %s", reg
);
13856 /* Locate some local-dynamic symbol still in use by this function
13857 so that we can print its name in some tls_local_dynamic_base
13861 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13865 if (GET_CODE (x
) == SYMBOL_REF
13866 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13868 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13875 static const char *
13876 get_some_local_dynamic_name (void)
13880 if (cfun
->machine
->some_ld_name
)
13881 return cfun
->machine
->some_ld_name
;
13883 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13884 if (NONDEBUG_INSN_P (insn
)
13885 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13886 return cfun
->machine
->some_ld_name
;
13891 /* Meaning of CODE:
13892 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13893 C -- print opcode suffix for set/cmov insn.
13894 c -- like C, but print reversed condition
13895 F,f -- likewise, but for floating-point.
13896 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13898 R -- print the prefix for register names.
13899 z -- print the opcode suffix for the size of the current operand.
13900 Z -- likewise, with special suffixes for x87 instructions.
13901 * -- print a star (in certain assembler syntax)
13902 A -- print an absolute memory reference.
13903 E -- print address with DImode register names if TARGET_64BIT.
13904 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13905 s -- print a shift double count, followed by the assemblers argument
13907 b -- print the QImode name of the register for the indicated operand.
13908 %b0 would print %al if operands[0] is reg 0.
13909 w -- likewise, print the HImode name of the register.
13910 k -- likewise, print the SImode name of the register.
13911 q -- likewise, print the DImode name of the register.
13912 x -- likewise, print the V4SFmode name of the register.
13913 t -- likewise, print the V8SFmode name of the register.
13914 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13915 y -- print "st(0)" instead of "st" as a register.
13916 d -- print duplicated register operand for AVX instruction.
13917 D -- print condition for SSE cmp instruction.
13918 P -- if PIC, print an @PLT suffix.
13919 p -- print raw symbol name.
13920 X -- don't print any sort of PIC '@' suffix for a symbol.
13921 & -- print some in-use local-dynamic symbol name.
13922 H -- print a memory address offset by 8; used for sse high-parts
13923 Y -- print condition for XOP pcom* instruction.
13924 + -- print a branch hint as 'cs' or 'ds' prefix
13925 ; -- print a semicolon (after prefixes due to bug in older gas).
13926 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13927 @ -- print a segment register of thread base pointer load
13928 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13932 ix86_print_operand (FILE *file
, rtx x
, int code
)
13939 switch (ASSEMBLER_DIALECT
)
13946 /* Intel syntax. For absolute addresses, registers should not
13947 be surrounded by braces. */
13951 ix86_print_operand (file
, x
, 0);
13958 gcc_unreachable ();
13961 ix86_print_operand (file
, x
, 0);
13965 /* Wrap address in an UNSPEC to declare special handling. */
13967 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
13969 output_address (x
);
13973 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13978 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13983 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13988 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13993 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13998 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14003 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14004 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14007 switch (GET_MODE_SIZE (GET_MODE (x
)))
14022 output_operand_lossage
14023 ("invalid operand size for operand code 'O'");
14032 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14034 /* Opcodes don't get size suffixes if using Intel opcodes. */
14035 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14038 switch (GET_MODE_SIZE (GET_MODE (x
)))
14057 output_operand_lossage
14058 ("invalid operand size for operand code 'z'");
14063 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14065 (0, "non-integer operand used with operand code 'z'");
14069 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14070 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14073 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14075 switch (GET_MODE_SIZE (GET_MODE (x
)))
14078 #ifdef HAVE_AS_IX86_FILDS
14088 #ifdef HAVE_AS_IX86_FILDQ
14091 fputs ("ll", file
);
14099 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14101 /* 387 opcodes don't get size suffixes
14102 if the operands are registers. */
14103 if (STACK_REG_P (x
))
14106 switch (GET_MODE_SIZE (GET_MODE (x
)))
14127 output_operand_lossage
14128 ("invalid operand type used with operand code 'Z'");
14132 output_operand_lossage
14133 ("invalid operand size for operand code 'Z'");
14151 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14153 ix86_print_operand (file
, x
, 0);
14154 fputs (", ", file
);
14159 switch (GET_CODE (x
))
14162 fputs ("neq", file
);
14165 fputs ("eq", file
);
14169 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14173 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14177 fputs ("le", file
);
14181 fputs ("lt", file
);
14184 fputs ("unord", file
);
14187 fputs ("ord", file
);
14190 fputs ("ueq", file
);
14193 fputs ("nlt", file
);
14196 fputs ("nle", file
);
14199 fputs ("ule", file
);
14202 fputs ("ult", file
);
14205 fputs ("une", file
);
14208 output_operand_lossage ("operand is not a condition code, "
14209 "invalid operand code 'Y'");
14215 /* Little bit of braindamage here. The SSE compare instructions
14216 does use completely different names for the comparisons that the
14217 fp conditional moves. */
14218 switch (GET_CODE (x
))
14223 fputs ("eq_us", file
);
14227 fputs ("eq", file
);
14232 fputs ("nge", file
);
14236 fputs ("lt", file
);
14241 fputs ("ngt", file
);
14245 fputs ("le", file
);
14248 fputs ("unord", file
);
14253 fputs ("neq_oq", file
);
14257 fputs ("neq", file
);
14262 fputs ("ge", file
);
14266 fputs ("nlt", file
);
14271 fputs ("gt", file
);
14275 fputs ("nle", file
);
14278 fputs ("ord", file
);
14281 output_operand_lossage ("operand is not a condition code, "
14282 "invalid operand code 'D'");
14289 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14290 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14296 if (!COMPARISON_P (x
))
14298 output_operand_lossage ("operand is not a condition code, "
14299 "invalid operand code '%c'", code
);
14302 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14303 code
== 'c' || code
== 'f',
14304 code
== 'F' || code
== 'f',
14309 if (!offsettable_memref_p (x
))
14311 output_operand_lossage ("operand is not an offsettable memory "
14312 "reference, invalid operand code 'H'");
14315 /* It doesn't actually matter what mode we use here, as we're
14316 only going to use this for printing. */
14317 x
= adjust_address_nv (x
, DImode
, 8);
14321 gcc_assert (CONST_INT_P (x
));
14323 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14324 #ifdef HAVE_AS_IX86_HLE
14325 fputs ("xacquire ", file
);
14327 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14329 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14330 #ifdef HAVE_AS_IX86_HLE
14331 fputs ("xrelease ", file
);
14333 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14335 /* We do not want to print value of the operand. */
14339 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14345 const char *name
= get_some_local_dynamic_name ();
14347 output_operand_lossage ("'%%&' used without any "
14348 "local dynamic TLS references");
14350 assemble_name (file
, name
);
14359 || optimize_function_for_size_p (cfun
)
14360 || !TARGET_BRANCH_PREDICTION_HINTS
)
14363 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14366 int pred_val
= INTVAL (XEXP (x
, 0));
14368 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14369 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14371 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14373 = final_forward_branch_p (current_output_insn
) == 0;
14375 /* Emit hints only in the case default branch prediction
14376 heuristics would fail. */
14377 if (taken
!= cputaken
)
14379 /* We use 3e (DS) prefix for taken branches and
14380 2e (CS) prefix for not taken branches. */
14382 fputs ("ds ; ", file
);
14384 fputs ("cs ; ", file
);
14392 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14398 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14401 /* The kernel uses a different segment register for performance
14402 reasons; a system call would not have to trash the userspace
14403 segment register, which would be expensive. */
14404 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14405 fputs ("fs", file
);
14407 fputs ("gs", file
);
14411 putc (TARGET_AVX2
? 'i' : 'f', file
);
14415 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14416 fputs ("addr32 ", file
);
14420 output_operand_lossage ("invalid operand code '%c'", code
);
14425 print_reg (x
, code
, file
);
14427 else if (MEM_P (x
))
14429 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14430 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14431 && GET_MODE (x
) != BLKmode
)
14434 switch (GET_MODE_SIZE (GET_MODE (x
)))
14436 case 1: size
= "BYTE"; break;
14437 case 2: size
= "WORD"; break;
14438 case 4: size
= "DWORD"; break;
14439 case 8: size
= "QWORD"; break;
14440 case 12: size
= "TBYTE"; break;
14442 if (GET_MODE (x
) == XFmode
)
14447 case 32: size
= "YMMWORD"; break;
14449 gcc_unreachable ();
14452 /* Check for explicit size override (codes 'b', 'w', 'k',
14456 else if (code
== 'w')
14458 else if (code
== 'k')
14460 else if (code
== 'q')
14462 else if (code
== 'x')
14465 fputs (size
, file
);
14466 fputs (" PTR ", file
);
14470 /* Avoid (%rip) for call operands. */
14471 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14472 && !CONST_INT_P (x
))
14473 output_addr_const (file
, x
);
14474 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14475 output_operand_lossage ("invalid constraints for operand");
14477 output_address (x
);
14480 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14485 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14486 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14488 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14490 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14492 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14494 fprintf (file
, "0x%08x", (unsigned int) l
);
14497 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14502 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14503 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14505 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14507 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14510 /* These float cases don't actually occur as immediate operands. */
14511 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14515 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14516 fputs (dstr
, file
);
14521 /* We have patterns that allow zero sets of memory, for instance.
14522 In 64-bit mode, we should probably support all 8-byte vectors,
14523 since we can in fact encode that into an immediate. */
14524 if (GET_CODE (x
) == CONST_VECTOR
)
14526 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14530 if (code
!= 'P' && code
!= 'p')
14532 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14534 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14537 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14538 || GET_CODE (x
) == LABEL_REF
)
14540 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14543 fputs ("OFFSET FLAT:", file
);
14546 if (CONST_INT_P (x
))
14547 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14548 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14549 output_pic_addr_const (file
, x
, code
);
14551 output_addr_const (file
, x
);
14556 ix86_print_operand_punct_valid_p (unsigned char code
)
14558 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14559 || code
== ';' || code
== '~' || code
== '^');
14562 /* Print a memory operand whose address is ADDR. */
14565 ix86_print_operand_address (FILE *file
, rtx addr
)
14567 struct ix86_address parts
;
14568 rtx base
, index
, disp
;
14574 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14576 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14577 gcc_assert (parts
.index
== NULL_RTX
);
14578 parts
.index
= XVECEXP (addr
, 0, 1);
14579 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14580 addr
= XVECEXP (addr
, 0, 0);
14583 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14585 gcc_assert (TARGET_64BIT
);
14586 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14590 ok
= ix86_decompose_address (addr
, &parts
);
14595 index
= parts
.index
;
14597 scale
= parts
.scale
;
14605 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14607 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14610 gcc_unreachable ();
14613 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14614 if (TARGET_64BIT
&& !base
&& !index
)
14618 if (GET_CODE (disp
) == CONST
14619 && GET_CODE (XEXP (disp
, 0)) == PLUS
14620 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14621 symbol
= XEXP (XEXP (disp
, 0), 0);
14623 if (GET_CODE (symbol
) == LABEL_REF
14624 || (GET_CODE (symbol
) == SYMBOL_REF
14625 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14628 if (!base
&& !index
)
14630 /* Displacement only requires special attention. */
14632 if (CONST_INT_P (disp
))
14634 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14635 fputs ("ds:", file
);
14636 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14639 output_pic_addr_const (file
, disp
, 0);
14641 output_addr_const (file
, disp
);
14645 /* Print SImode register names to force addr32 prefix. */
14646 if (SImode_address_operand (addr
, VOIDmode
))
14648 #ifdef ENABLE_CHECKING
14649 gcc_assert (TARGET_64BIT
);
14650 switch (GET_CODE (addr
))
14653 gcc_assert (GET_MODE (addr
) == SImode
);
14654 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14658 gcc_assert (GET_MODE (addr
) == DImode
);
14661 gcc_unreachable ();
14664 gcc_assert (!code
);
14670 && CONST_INT_P (disp
)
14671 && INTVAL (disp
) < -16*1024*1024)
14673 /* X32 runs in 64-bit mode, where displacement, DISP, in
14674 address DISP(%r64), is encoded as 32-bit immediate sign-
14675 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14676 address is %r64 + 0xffffffffbffffd00. When %r64 <
14677 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14678 which is invalid for x32. The correct address is %r64
14679 - 0x40000300 == 0xf7ffdd64. To properly encode
14680 -0x40000300(%r64) for x32, we zero-extend negative
14681 displacement by forcing addr32 prefix which truncates
14682 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14683 zero-extend all negative displacements, including -1(%rsp).
14684 However, for small negative displacements, sign-extension
14685 won't cause overflow. We only zero-extend negative
14686 displacements if they < -16*1024*1024, which is also used
14687 to check legitimate address displacements for PIC. */
14691 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14696 output_pic_addr_const (file
, disp
, 0);
14697 else if (GET_CODE (disp
) == LABEL_REF
)
14698 output_asm_label (disp
);
14700 output_addr_const (file
, disp
);
14705 print_reg (base
, code
, file
);
14709 print_reg (index
, vsib
? 0 : code
, file
);
14710 if (scale
!= 1 || vsib
)
14711 fprintf (file
, ",%d", scale
);
14717 rtx offset
= NULL_RTX
;
14721 /* Pull out the offset of a symbol; print any symbol itself. */
14722 if (GET_CODE (disp
) == CONST
14723 && GET_CODE (XEXP (disp
, 0)) == PLUS
14724 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14726 offset
= XEXP (XEXP (disp
, 0), 1);
14727 disp
= gen_rtx_CONST (VOIDmode
,
14728 XEXP (XEXP (disp
, 0), 0));
14732 output_pic_addr_const (file
, disp
, 0);
14733 else if (GET_CODE (disp
) == LABEL_REF
)
14734 output_asm_label (disp
);
14735 else if (CONST_INT_P (disp
))
14738 output_addr_const (file
, disp
);
14744 print_reg (base
, code
, file
);
14747 if (INTVAL (offset
) >= 0)
14749 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14753 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14760 print_reg (index
, vsib
? 0 : code
, file
);
14761 if (scale
!= 1 || vsib
)
14762 fprintf (file
, "*%d", scale
);
14769 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14772 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14776 if (GET_CODE (x
) != UNSPEC
)
14779 op
= XVECEXP (x
, 0, 0);
14780 switch (XINT (x
, 1))
14782 case UNSPEC_GOTTPOFF
:
14783 output_addr_const (file
, op
);
14784 /* FIXME: This might be @TPOFF in Sun ld. */
14785 fputs ("@gottpoff", file
);
14788 output_addr_const (file
, op
);
14789 fputs ("@tpoff", file
);
14791 case UNSPEC_NTPOFF
:
14792 output_addr_const (file
, op
);
14794 fputs ("@tpoff", file
);
14796 fputs ("@ntpoff", file
);
14798 case UNSPEC_DTPOFF
:
14799 output_addr_const (file
, op
);
14800 fputs ("@dtpoff", file
);
14802 case UNSPEC_GOTNTPOFF
:
14803 output_addr_const (file
, op
);
14805 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14806 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14808 fputs ("@gotntpoff", file
);
14810 case UNSPEC_INDNTPOFF
:
14811 output_addr_const (file
, op
);
14812 fputs ("@indntpoff", file
);
14815 case UNSPEC_MACHOPIC_OFFSET
:
14816 output_addr_const (file
, op
);
14818 machopic_output_function_base_name (file
);
14822 case UNSPEC_STACK_CHECK
:
14826 gcc_assert (flag_split_stack
);
14828 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14829 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14831 gcc_unreachable ();
14834 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14845 /* Split one or more double-mode RTL references into pairs of half-mode
14846 references. The RTL can be REG, offsettable MEM, integer constant, or
14847 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14848 split and "num" is its length. lo_half and hi_half are output arrays
14849 that parallel "operands". */
14852 split_double_mode (enum machine_mode mode
, rtx operands
[],
14853 int num
, rtx lo_half
[], rtx hi_half
[])
14855 enum machine_mode half_mode
;
14861 half_mode
= DImode
;
14864 half_mode
= SImode
;
14867 gcc_unreachable ();
14870 byte
= GET_MODE_SIZE (half_mode
);
14874 rtx op
= operands
[num
];
14876 /* simplify_subreg refuse to split volatile memory addresses,
14877 but we still have to handle it. */
14880 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14881 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14885 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14886 GET_MODE (op
) == VOIDmode
14887 ? mode
: GET_MODE (op
), 0);
14888 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14889 GET_MODE (op
) == VOIDmode
14890 ? mode
: GET_MODE (op
), byte
);
14895 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14896 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14897 is the expression of the binary operation. The output may either be
14898 emitted here, or returned to the caller, like all output_* functions.
14900 There is no guarantee that the operands are the same mode, as they
14901 might be within FLOAT or FLOAT_EXTEND expressions. */
14903 #ifndef SYSV386_COMPAT
14904 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14905 wants to fix the assemblers because that causes incompatibility
14906 with gcc. No-one wants to fix gcc because that causes
14907 incompatibility with assemblers... You can use the option of
14908 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14909 #define SYSV386_COMPAT 1
14913 output_387_binary_op (rtx insn
, rtx
*operands
)
14915 static char buf
[40];
14918 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14920 #ifdef ENABLE_CHECKING
14921 /* Even if we do not want to check the inputs, this documents input
14922 constraints. Which helps in understanding the following code. */
14923 if (STACK_REG_P (operands
[0])
14924 && ((REG_P (operands
[1])
14925 && REGNO (operands
[0]) == REGNO (operands
[1])
14926 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14927 || (REG_P (operands
[2])
14928 && REGNO (operands
[0]) == REGNO (operands
[2])
14929 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14930 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14933 gcc_assert (is_sse
);
14936 switch (GET_CODE (operands
[3]))
14939 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14940 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14948 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14949 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14957 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14958 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14966 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14967 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14975 gcc_unreachable ();
14982 strcpy (buf
, ssep
);
14983 if (GET_MODE (operands
[0]) == SFmode
)
14984 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14986 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14990 strcpy (buf
, ssep
+ 1);
14991 if (GET_MODE (operands
[0]) == SFmode
)
14992 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14994 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15000 switch (GET_CODE (operands
[3]))
15004 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15006 rtx temp
= operands
[2];
15007 operands
[2] = operands
[1];
15008 operands
[1] = temp
;
15011 /* know operands[0] == operands[1]. */
15013 if (MEM_P (operands
[2]))
15019 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15021 if (STACK_TOP_P (operands
[0]))
15022 /* How is it that we are storing to a dead operand[2]?
15023 Well, presumably operands[1] is dead too. We can't
15024 store the result to st(0) as st(0) gets popped on this
15025 instruction. Instead store to operands[2] (which I
15026 think has to be st(1)). st(1) will be popped later.
15027 gcc <= 2.8.1 didn't have this check and generated
15028 assembly code that the Unixware assembler rejected. */
15029 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15031 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15035 if (STACK_TOP_P (operands
[0]))
15036 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15038 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15043 if (MEM_P (operands
[1]))
15049 if (MEM_P (operands
[2]))
15055 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15058 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15059 derived assemblers, confusingly reverse the direction of
15060 the operation for fsub{r} and fdiv{r} when the
15061 destination register is not st(0). The Intel assembler
15062 doesn't have this brain damage. Read !SYSV386_COMPAT to
15063 figure out what the hardware really does. */
15064 if (STACK_TOP_P (operands
[0]))
15065 p
= "{p\t%0, %2|rp\t%2, %0}";
15067 p
= "{rp\t%2, %0|p\t%0, %2}";
15069 if (STACK_TOP_P (operands
[0]))
15070 /* As above for fmul/fadd, we can't store to st(0). */
15071 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15073 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15078 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15081 if (STACK_TOP_P (operands
[0]))
15082 p
= "{rp\t%0, %1|p\t%1, %0}";
15084 p
= "{p\t%1, %0|rp\t%0, %1}";
15086 if (STACK_TOP_P (operands
[0]))
15087 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15089 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15094 if (STACK_TOP_P (operands
[0]))
15096 if (STACK_TOP_P (operands
[1]))
15097 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15099 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15102 else if (STACK_TOP_P (operands
[1]))
15105 p
= "{\t%1, %0|r\t%0, %1}";
15107 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15113 p
= "{r\t%2, %0|\t%0, %2}";
15115 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15121 gcc_unreachable ();
15128 /* Check if a 256bit AVX register is referenced inside of EXP. */
15131 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15135 if (GET_CODE (exp
) == SUBREG
)
15136 exp
= SUBREG_REG (exp
);
15139 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15145 /* Return needed mode for entity in optimize_mode_switching pass. */
15148 ix86_avx_u128_mode_needed (rtx insn
)
15154 /* Needed mode is set to AVX_U128_CLEAN if there are
15155 no 256bit modes used in function arguments. */
15156 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15158 link
= XEXP (link
, 1))
15160 if (GET_CODE (XEXP (link
, 0)) == USE
)
15162 rtx arg
= XEXP (XEXP (link
, 0), 0);
15164 if (ix86_check_avx256_register (&arg
, NULL
))
15165 return AVX_U128_ANY
;
15169 return AVX_U128_CLEAN
;
15172 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15173 changes state only when a 256bit register is written to, but we need
15174 to prevent the compiler from moving optimal insertion point above
15175 eventual read from 256bit register. */
15176 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15177 return AVX_U128_DIRTY
;
15179 return AVX_U128_ANY
;
15182 /* Return mode that i387 must be switched into
15183 prior to the execution of insn. */
15186 ix86_i387_mode_needed (int entity
, rtx insn
)
15188 enum attr_i387_cw mode
;
15190 /* The mode UNINITIALIZED is used to store control word after a
15191 function call or ASM pattern. The mode ANY specify that function
15192 has no requirements on the control word and make no changes in the
15193 bits we are interested in. */
15196 || (NONJUMP_INSN_P (insn
)
15197 && (asm_noperands (PATTERN (insn
)) >= 0
15198 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15199 return I387_CW_UNINITIALIZED
;
15201 if (recog_memoized (insn
) < 0)
15202 return I387_CW_ANY
;
15204 mode
= get_attr_i387_cw (insn
);
15209 if (mode
== I387_CW_TRUNC
)
15214 if (mode
== I387_CW_FLOOR
)
15219 if (mode
== I387_CW_CEIL
)
15224 if (mode
== I387_CW_MASK_PM
)
15229 gcc_unreachable ();
15232 return I387_CW_ANY
;
15235 /* Return mode that entity must be switched into
15236 prior to the execution of insn. */
15239 ix86_mode_needed (int entity
, rtx insn
)
15244 return ix86_avx_u128_mode_needed (insn
);
15249 return ix86_i387_mode_needed (entity
, insn
);
15251 gcc_unreachable ();
15256 /* Check if a 256bit AVX register is referenced in stores. */
15259 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15261 if (ix86_check_avx256_register (&dest
, NULL
))
15263 bool *used
= (bool *) data
;
15268 /* Calculate mode of upper 128bit AVX registers after the insn. */
15271 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15273 rtx pat
= PATTERN (insn
);
15275 if (vzeroupper_operation (pat
, VOIDmode
)
15276 || vzeroall_operation (pat
, VOIDmode
))
15277 return AVX_U128_CLEAN
;
15279 /* We know that state is clean after CALL insn if there are no
15280 256bit registers used in the function return register. */
15283 bool avx_reg256_found
= false;
15284 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15285 if (!avx_reg256_found
)
15286 return AVX_U128_CLEAN
;
15289 /* Otherwise, return current mode. Remember that if insn
15290 references AVX 256bit registers, the mode was already changed
15291 to DIRTY from MODE_NEEDED. */
15295 /* Return the mode that an insn results in. */
15298 ix86_mode_after (int entity
, int mode
, rtx insn
)
15303 return ix86_avx_u128_mode_after (mode
, insn
);
15310 gcc_unreachable ();
15315 ix86_avx_u128_mode_entry (void)
15319 /* Entry mode is set to AVX_U128_DIRTY if there are
15320 256bit modes used in function arguments. */
15321 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15322 arg
= TREE_CHAIN (arg
))
15324 rtx incoming
= DECL_INCOMING_RTL (arg
);
15326 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15327 return AVX_U128_DIRTY
;
15330 return AVX_U128_CLEAN
;
15333 /* Return a mode that ENTITY is assumed to be
15334 switched to at function entry. */
15337 ix86_mode_entry (int entity
)
15342 return ix86_avx_u128_mode_entry ();
15347 return I387_CW_ANY
;
15349 gcc_unreachable ();
15354 ix86_avx_u128_mode_exit (void)
15356 rtx reg
= crtl
->return_rtx
;
15358 /* Exit mode is set to AVX_U128_DIRTY if there are
15359 256bit modes used in the function return register. */
15360 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15361 return AVX_U128_DIRTY
;
15363 return AVX_U128_CLEAN
;
15366 /* Return a mode that ENTITY is assumed to be
15367 switched to at function exit. */
15370 ix86_mode_exit (int entity
)
15375 return ix86_avx_u128_mode_exit ();
15380 return I387_CW_ANY
;
15382 gcc_unreachable ();
15386 /* Output code to initialize control word copies used by trunc?f?i and
15387 rounding patterns. CURRENT_MODE is set to current control word,
15388 while NEW_MODE is set to new control word. */
15391 emit_i387_cw_initialization (int mode
)
15393 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15396 enum ix86_stack_slot slot
;
15398 rtx reg
= gen_reg_rtx (HImode
);
15400 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15401 emit_move_insn (reg
, copy_rtx (stored_mode
));
15403 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15404 || optimize_function_for_size_p (cfun
))
15408 case I387_CW_TRUNC
:
15409 /* round toward zero (truncate) */
15410 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15411 slot
= SLOT_CW_TRUNC
;
15414 case I387_CW_FLOOR
:
15415 /* round down toward -oo */
15416 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15417 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15418 slot
= SLOT_CW_FLOOR
;
15422 /* round up toward +oo */
15423 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15424 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15425 slot
= SLOT_CW_CEIL
;
15428 case I387_CW_MASK_PM
:
15429 /* mask precision exception for nearbyint() */
15430 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15431 slot
= SLOT_CW_MASK_PM
;
15435 gcc_unreachable ();
15442 case I387_CW_TRUNC
:
15443 /* round toward zero (truncate) */
15444 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15445 slot
= SLOT_CW_TRUNC
;
15448 case I387_CW_FLOOR
:
15449 /* round down toward -oo */
15450 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15451 slot
= SLOT_CW_FLOOR
;
15455 /* round up toward +oo */
15456 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15457 slot
= SLOT_CW_CEIL
;
15460 case I387_CW_MASK_PM
:
15461 /* mask precision exception for nearbyint() */
15462 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15463 slot
= SLOT_CW_MASK_PM
;
15467 gcc_unreachable ();
15471 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15473 new_mode
= assign_386_stack_local (HImode
, slot
);
15474 emit_move_insn (new_mode
, reg
);
15477 /* Emit vzeroupper. */
15480 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15484 /* Cancel automatic vzeroupper insertion if there are
15485 live call-saved SSE registers at the insertion point. */
15487 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15488 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15492 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15493 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15496 emit_insn (gen_avx_vzeroupper ());
15499 /* Generate one or more insns to set ENTITY to MODE. */
15502 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15507 if (mode
== AVX_U128_CLEAN
)
15508 ix86_avx_emit_vzeroupper (regs_live
);
15514 if (mode
!= I387_CW_ANY
15515 && mode
!= I387_CW_UNINITIALIZED
)
15516 emit_i387_cw_initialization (mode
);
15519 gcc_unreachable ();
15523 /* Output code for INSN to convert a float to a signed int. OPERANDS
15524 are the insn operands. The output may be [HSD]Imode and the input
15525 operand may be [SDX]Fmode. */
15528 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15530 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15531 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15532 int round_mode
= get_attr_i387_cw (insn
);
15534 /* Jump through a hoop or two for DImode, since the hardware has no
15535 non-popping instruction. We used to do this a different way, but
15536 that was somewhat fragile and broke with post-reload splitters. */
15537 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15538 output_asm_insn ("fld\t%y1", operands
);
15540 gcc_assert (STACK_TOP_P (operands
[1]));
15541 gcc_assert (MEM_P (operands
[0]));
15542 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15545 output_asm_insn ("fisttp%Z0\t%0", operands
);
15548 if (round_mode
!= I387_CW_ANY
)
15549 output_asm_insn ("fldcw\t%3", operands
);
15550 if (stack_top_dies
|| dimode_p
)
15551 output_asm_insn ("fistp%Z0\t%0", operands
);
15553 output_asm_insn ("fist%Z0\t%0", operands
);
15554 if (round_mode
!= I387_CW_ANY
)
15555 output_asm_insn ("fldcw\t%2", operands
);
15561 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15562 have the values zero or one, indicates the ffreep insn's operand
15563 from the OPERANDS array. */
15565 static const char *
15566 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15568 if (TARGET_USE_FFREEP
)
15569 #ifdef HAVE_AS_IX86_FFREEP
15570 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15573 static char retval
[32];
15574 int regno
= REGNO (operands
[opno
]);
15576 gcc_assert (STACK_REGNO_P (regno
));
15578 regno
-= FIRST_STACK_REG
;
15580 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15585 return opno
? "fstp\t%y1" : "fstp\t%y0";
15589 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15590 should be used. UNORDERED_P is true when fucom should be used. */
15593 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15595 int stack_top_dies
;
15596 rtx cmp_op0
, cmp_op1
;
15597 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15601 cmp_op0
= operands
[0];
15602 cmp_op1
= operands
[1];
15606 cmp_op0
= operands
[1];
15607 cmp_op1
= operands
[2];
15612 if (GET_MODE (operands
[0]) == SFmode
)
15614 return "%vucomiss\t{%1, %0|%0, %1}";
15616 return "%vcomiss\t{%1, %0|%0, %1}";
15619 return "%vucomisd\t{%1, %0|%0, %1}";
15621 return "%vcomisd\t{%1, %0|%0, %1}";
15624 gcc_assert (STACK_TOP_P (cmp_op0
));
15626 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15628 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15630 if (stack_top_dies
)
15632 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15633 return output_387_ffreep (operands
, 1);
15636 return "ftst\n\tfnstsw\t%0";
15639 if (STACK_REG_P (cmp_op1
)
15641 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15642 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15644 /* If both the top of the 387 stack dies, and the other operand
15645 is also a stack register that dies, then this must be a
15646 `fcompp' float compare */
15650 /* There is no double popping fcomi variant. Fortunately,
15651 eflags is immune from the fstp's cc clobbering. */
15653 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15655 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15656 return output_387_ffreep (operands
, 0);
15661 return "fucompp\n\tfnstsw\t%0";
15663 return "fcompp\n\tfnstsw\t%0";
15668 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15670 static const char * const alt
[16] =
15672 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15673 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15674 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15675 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15677 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15678 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15682 "fcomi\t{%y1, %0|%0, %y1}",
15683 "fcomip\t{%y1, %0|%0, %y1}",
15684 "fucomi\t{%y1, %0|%0, %y1}",
15685 "fucomip\t{%y1, %0|%0, %y1}",
15696 mask
= eflags_p
<< 3;
15697 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15698 mask
|= unordered_p
<< 1;
15699 mask
|= stack_top_dies
;
15701 gcc_assert (mask
< 16);
15710 ix86_output_addr_vec_elt (FILE *file
, int value
)
15712 const char *directive
= ASM_LONG
;
15716 directive
= ASM_QUAD
;
15718 gcc_assert (!TARGET_64BIT
);
15721 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15725 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15727 const char *directive
= ASM_LONG
;
15730 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15731 directive
= ASM_QUAD
;
15733 gcc_assert (!TARGET_64BIT
);
15735 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15736 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15737 fprintf (file
, "%s%s%d-%s%d\n",
15738 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15739 else if (HAVE_AS_GOTOFF_IN_DATA
)
15740 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15742 else if (TARGET_MACHO
)
15744 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15745 machopic_output_function_base_name (file
);
15750 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15751 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15754 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15758 ix86_expand_clear (rtx dest
)
15762 /* We play register width games, which are only valid after reload. */
15763 gcc_assert (reload_completed
);
15765 /* Avoid HImode and its attendant prefix byte. */
15766 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15767 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15768 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15770 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15771 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15773 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15774 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15780 /* X is an unchanging MEM. If it is a constant pool reference, return
15781 the constant pool rtx, else NULL. */
15784 maybe_get_pool_constant (rtx x
)
15786 x
= ix86_delegitimize_address (XEXP (x
, 0));
15788 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15789 return get_pool_constant (x
);
15795 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15798 enum tls_model model
;
15803 if (GET_CODE (op1
) == SYMBOL_REF
)
15805 model
= SYMBOL_REF_TLS_MODEL (op1
);
15808 op1
= legitimize_tls_address (op1
, model
, true);
15809 op1
= force_operand (op1
, op0
);
15812 if (GET_MODE (op1
) != mode
)
15813 op1
= convert_to_mode (mode
, op1
, 1);
15815 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15816 && SYMBOL_REF_DLLIMPORT_P (op1
))
15817 op1
= legitimize_dllimport_symbol (op1
, false);
15819 else if (GET_CODE (op1
) == CONST
15820 && GET_CODE (XEXP (op1
, 0)) == PLUS
15821 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15823 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15824 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15827 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15829 tmp
= legitimize_tls_address (symbol
, model
, true);
15830 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15831 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15832 tmp
= legitimize_dllimport_symbol (symbol
, true);
15836 tmp
= force_operand (tmp
, NULL
);
15837 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15838 op0
, 1, OPTAB_DIRECT
);
15841 if (GET_MODE (tmp
) != mode
)
15842 op1
= convert_to_mode (mode
, tmp
, 1);
15846 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15847 && symbolic_operand (op1
, mode
))
15849 if (TARGET_MACHO
&& !TARGET_64BIT
)
15852 /* dynamic-no-pic */
15853 if (MACHOPIC_INDIRECT
)
15855 rtx temp
= ((reload_in_progress
15856 || ((op0
&& REG_P (op0
))
15858 ? op0
: gen_reg_rtx (Pmode
));
15859 op1
= machopic_indirect_data_reference (op1
, temp
);
15861 op1
= machopic_legitimize_pic_address (op1
, mode
,
15862 temp
== op1
? 0 : temp
);
15864 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15866 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15870 if (GET_CODE (op0
) == MEM
)
15871 op1
= force_reg (Pmode
, op1
);
15875 if (GET_CODE (temp
) != REG
)
15876 temp
= gen_reg_rtx (Pmode
);
15877 temp
= legitimize_pic_address (op1
, temp
);
15882 /* dynamic-no-pic */
15888 op1
= force_reg (mode
, op1
);
15889 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15891 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15892 op1
= legitimize_pic_address (op1
, reg
);
15895 if (GET_MODE (op1
) != mode
)
15896 op1
= convert_to_mode (mode
, op1
, 1);
15903 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15904 || !push_operand (op0
, mode
))
15906 op1
= force_reg (mode
, op1
);
15908 if (push_operand (op0
, mode
)
15909 && ! general_no_elim_operand (op1
, mode
))
15910 op1
= copy_to_mode_reg (mode
, op1
);
15912 /* Force large constants in 64bit compilation into register
15913 to get them CSEed. */
15914 if (can_create_pseudo_p ()
15915 && (mode
== DImode
) && TARGET_64BIT
15916 && immediate_operand (op1
, mode
)
15917 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15918 && !register_operand (op0
, mode
)
15920 op1
= copy_to_mode_reg (mode
, op1
);
15922 if (can_create_pseudo_p ()
15923 && FLOAT_MODE_P (mode
)
15924 && GET_CODE (op1
) == CONST_DOUBLE
)
15926 /* If we are loading a floating point constant to a register,
15927 force the value to memory now, since we'll get better code
15928 out the back end. */
15930 op1
= validize_mem (force_const_mem (mode
, op1
));
15931 if (!register_operand (op0
, mode
))
15933 rtx temp
= gen_reg_rtx (mode
);
15934 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15935 emit_move_insn (op0
, temp
);
15941 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15945 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15947 rtx op0
= operands
[0], op1
= operands
[1];
15948 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15950 /* Force constants other than zero into memory. We do not know how
15951 the instructions used to build constants modify the upper 64 bits
15952 of the register, once we have that information we may be able
15953 to handle some of them more efficiently. */
15954 if (can_create_pseudo_p ()
15955 && register_operand (op0
, mode
)
15956 && (CONSTANT_P (op1
)
15957 || (GET_CODE (op1
) == SUBREG
15958 && CONSTANT_P (SUBREG_REG (op1
))))
15959 && !standard_sse_constant_p (op1
))
15960 op1
= validize_mem (force_const_mem (mode
, op1
));
15962 /* We need to check memory alignment for SSE mode since attribute
15963 can make operands unaligned. */
15964 if (can_create_pseudo_p ()
15965 && SSE_REG_MODE_P (mode
)
15966 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15967 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15971 /* ix86_expand_vector_move_misalign() does not like constants ... */
15972 if (CONSTANT_P (op1
)
15973 || (GET_CODE (op1
) == SUBREG
15974 && CONSTANT_P (SUBREG_REG (op1
))))
15975 op1
= validize_mem (force_const_mem (mode
, op1
));
15977 /* ... nor both arguments in memory. */
15978 if (!register_operand (op0
, mode
)
15979 && !register_operand (op1
, mode
))
15980 op1
= force_reg (mode
, op1
);
15982 tmp
[0] = op0
; tmp
[1] = op1
;
15983 ix86_expand_vector_move_misalign (mode
, tmp
);
15987 /* Make operand1 a register if it isn't already. */
15988 if (can_create_pseudo_p ()
15989 && !register_operand (op0
, mode
)
15990 && !register_operand (op1
, mode
))
15992 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15996 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15999 /* Split 32-byte AVX unaligned load and store if needed. */
16002 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16005 rtx (*extract
) (rtx
, rtx
, rtx
);
16006 rtx (*load_unaligned
) (rtx
, rtx
);
16007 rtx (*store_unaligned
) (rtx
, rtx
);
16008 enum machine_mode mode
;
16010 switch (GET_MODE (op0
))
16013 gcc_unreachable ();
16015 extract
= gen_avx_vextractf128v32qi
;
16016 load_unaligned
= gen_avx_loaddqu256
;
16017 store_unaligned
= gen_avx_storedqu256
;
16021 extract
= gen_avx_vextractf128v8sf
;
16022 load_unaligned
= gen_avx_loadups256
;
16023 store_unaligned
= gen_avx_storeups256
;
16027 extract
= gen_avx_vextractf128v4df
;
16028 load_unaligned
= gen_avx_loadupd256
;
16029 store_unaligned
= gen_avx_storeupd256
;
16036 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16038 rtx r
= gen_reg_rtx (mode
);
16039 m
= adjust_address (op1
, mode
, 0);
16040 emit_move_insn (r
, m
);
16041 m
= adjust_address (op1
, mode
, 16);
16042 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16043 emit_move_insn (op0
, r
);
16046 emit_insn (load_unaligned (op0
, op1
));
16048 else if (MEM_P (op0
))
16050 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16052 m
= adjust_address (op0
, mode
, 0);
16053 emit_insn (extract (m
, op1
, const0_rtx
));
16054 m
= adjust_address (op0
, mode
, 16);
16055 emit_insn (extract (m
, op1
, const1_rtx
));
16058 emit_insn (store_unaligned (op0
, op1
));
16061 gcc_unreachable ();
16064 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16065 straight to ix86_expand_vector_move. */
16066 /* Code generation for scalar reg-reg moves of single and double precision data:
16067 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16071 if (x86_sse_partial_reg_dependency == true)
16076 Code generation for scalar loads of double precision data:
16077 if (x86_sse_split_regs == true)
16078 movlpd mem, reg (gas syntax)
16082 Code generation for unaligned packed loads of single precision data
16083 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16084 if (x86_sse_unaligned_move_optimal)
16087 if (x86_sse_partial_reg_dependency == true)
16099 Code generation for unaligned packed loads of double precision data
16100 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16101 if (x86_sse_unaligned_move_optimal)
16104 if (x86_sse_split_regs == true)
16117 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16125 && GET_MODE_SIZE (mode
) == 32)
16127 switch (GET_MODE_CLASS (mode
))
16129 case MODE_VECTOR_INT
:
16131 op0
= gen_lowpart (V32QImode
, op0
);
16132 op1
= gen_lowpart (V32QImode
, op1
);
16135 case MODE_VECTOR_FLOAT
:
16136 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16140 gcc_unreachable ();
16148 /* ??? If we have typed data, then it would appear that using
16149 movdqu is the only way to get unaligned data loaded with
16151 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16153 op0
= gen_lowpart (V16QImode
, op0
);
16154 op1
= gen_lowpart (V16QImode
, op1
);
16155 /* We will eventually emit movups based on insn attributes. */
16156 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16158 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16163 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16164 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16165 || optimize_function_for_size_p (cfun
))
16167 /* We will eventually emit movups based on insn attributes. */
16168 emit_insn (gen_sse2_loadupd (op0
, op1
));
16172 /* When SSE registers are split into halves, we can avoid
16173 writing to the top half twice. */
16174 if (TARGET_SSE_SPLIT_REGS
)
16176 emit_clobber (op0
);
16181 /* ??? Not sure about the best option for the Intel chips.
16182 The following would seem to satisfy; the register is
16183 entirely cleared, breaking the dependency chain. We
16184 then store to the upper half, with a dependency depth
16185 of one. A rumor has it that Intel recommends two movsd
16186 followed by an unpacklpd, but this is unconfirmed. And
16187 given that the dependency depth of the unpacklpd would
16188 still be one, I'm not sure why this would be better. */
16189 zero
= CONST0_RTX (V2DFmode
);
16192 m
= adjust_address (op1
, DFmode
, 0);
16193 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16194 m
= adjust_address (op1
, DFmode
, 8);
16195 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16200 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16201 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16202 || optimize_function_for_size_p (cfun
))
16204 op0
= gen_lowpart (V4SFmode
, op0
);
16205 op1
= gen_lowpart (V4SFmode
, op1
);
16206 emit_insn (gen_sse_loadups (op0
, op1
));
16210 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16211 emit_move_insn (op0
, CONST0_RTX (mode
));
16213 emit_clobber (op0
);
16215 if (mode
!= V4SFmode
)
16216 op0
= gen_lowpart (V4SFmode
, op0
);
16218 m
= adjust_address (op1
, V2SFmode
, 0);
16219 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16220 m
= adjust_address (op1
, V2SFmode
, 8);
16221 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16224 else if (MEM_P (op0
))
16226 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16228 op0
= gen_lowpart (V16QImode
, op0
);
16229 op1
= gen_lowpart (V16QImode
, op1
);
16230 /* We will eventually emit movups based on insn attributes. */
16231 emit_insn (gen_sse2_storedqu (op0
, op1
));
16233 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16236 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16237 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16238 || optimize_function_for_size_p (cfun
))
16239 /* We will eventually emit movups based on insn attributes. */
16240 emit_insn (gen_sse2_storeupd (op0
, op1
));
16243 m
= adjust_address (op0
, DFmode
, 0);
16244 emit_insn (gen_sse2_storelpd (m
, op1
));
16245 m
= adjust_address (op0
, DFmode
, 8);
16246 emit_insn (gen_sse2_storehpd (m
, op1
));
16251 if (mode
!= V4SFmode
)
16252 op1
= gen_lowpart (V4SFmode
, op1
);
16255 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16256 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16257 || optimize_function_for_size_p (cfun
))
16259 op0
= gen_lowpart (V4SFmode
, op0
);
16260 emit_insn (gen_sse_storeups (op0
, op1
));
16264 m
= adjust_address (op0
, V2SFmode
, 0);
16265 emit_insn (gen_sse_storelps (m
, op1
));
16266 m
= adjust_address (op0
, V2SFmode
, 8);
16267 emit_insn (gen_sse_storehps (m
, op1
));
16272 gcc_unreachable ();
16275 /* Expand a push in MODE. This is some mode for which we do not support
16276 proper push instructions, at least from the registers that we expect
16277 the value to live in. */
16280 ix86_expand_push (enum machine_mode mode
, rtx x
)
16284 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16285 GEN_INT (-GET_MODE_SIZE (mode
)),
16286 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16287 if (tmp
!= stack_pointer_rtx
)
16288 emit_move_insn (stack_pointer_rtx
, tmp
);
16290 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16292 /* When we push an operand onto stack, it has to be aligned at least
16293 at the function argument boundary. However since we don't have
16294 the argument type, we can't determine the actual argument
16296 emit_move_insn (tmp
, x
);
16299 /* Helper function of ix86_fixup_binary_operands to canonicalize
16300 operand order. Returns true if the operands should be swapped. */
16303 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16306 rtx dst
= operands
[0];
16307 rtx src1
= operands
[1];
16308 rtx src2
= operands
[2];
16310 /* If the operation is not commutative, we can't do anything. */
16311 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16314 /* Highest priority is that src1 should match dst. */
16315 if (rtx_equal_p (dst
, src1
))
16317 if (rtx_equal_p (dst
, src2
))
16320 /* Next highest priority is that immediate constants come second. */
16321 if (immediate_operand (src2
, mode
))
16323 if (immediate_operand (src1
, mode
))
16326 /* Lowest priority is that memory references should come second. */
16336 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16337 destination to use for the operation. If different from the true
16338 destination in operands[0], a copy operation will be required. */
16341 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16344 rtx dst
= operands
[0];
16345 rtx src1
= operands
[1];
16346 rtx src2
= operands
[2];
16348 /* Canonicalize operand order. */
16349 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16353 /* It is invalid to swap operands of different modes. */
16354 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16361 /* Both source operands cannot be in memory. */
16362 if (MEM_P (src1
) && MEM_P (src2
))
16364 /* Optimization: Only read from memory once. */
16365 if (rtx_equal_p (src1
, src2
))
16367 src2
= force_reg (mode
, src2
);
16371 src2
= force_reg (mode
, src2
);
16374 /* If the destination is memory, and we do not have matching source
16375 operands, do things in registers. */
16376 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16377 dst
= gen_reg_rtx (mode
);
16379 /* Source 1 cannot be a constant. */
16380 if (CONSTANT_P (src1
))
16381 src1
= force_reg (mode
, src1
);
16383 /* Source 1 cannot be a non-matching memory. */
16384 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16385 src1
= force_reg (mode
, src1
);
16387 /* Improve address combine. */
16389 && GET_MODE_CLASS (mode
) == MODE_INT
16391 src2
= force_reg (mode
, src2
);
16393 operands
[1] = src1
;
16394 operands
[2] = src2
;
16398 /* Similarly, but assume that the destination has already been
16399 set up properly. */
16402 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16403 enum machine_mode mode
, rtx operands
[])
16405 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16406 gcc_assert (dst
== operands
[0]);
16409 /* Attempt to expand a binary operator. Make the expansion closer to the
16410 actual machine, then just general_operand, which will allow 3 separate
16411 memory references (one output, two input) in a single insn. */
16414 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16417 rtx src1
, src2
, dst
, op
, clob
;
16419 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16420 src1
= operands
[1];
16421 src2
= operands
[2];
16423 /* Emit the instruction. */
16425 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16426 if (reload_in_progress
)
16428 /* Reload doesn't know about the flags register, and doesn't know that
16429 it doesn't want to clobber it. We can only do this with PLUS. */
16430 gcc_assert (code
== PLUS
);
16433 else if (reload_completed
16435 && !rtx_equal_p (dst
, src1
))
16437 /* This is going to be an LEA; avoid splitting it later. */
16442 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16443 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16446 /* Fix up the destination if needed. */
16447 if (dst
!= operands
[0])
16448 emit_move_insn (operands
[0], dst
);
16451 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16452 the given OPERANDS. */
16455 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16458 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16459 if (GET_CODE (operands
[1]) == SUBREG
)
16464 else if (GET_CODE (operands
[2]) == SUBREG
)
16469 /* Optimize (__m128i) d | (__m128i) e and similar code
16470 when d and e are float vectors into float vector logical
16471 insn. In C/C++ without using intrinsics there is no other way
16472 to express vector logical operation on float vectors than
16473 to cast them temporarily to integer vectors. */
16475 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16476 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16477 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16478 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16479 && SUBREG_BYTE (op1
) == 0
16480 && (GET_CODE (op2
) == CONST_VECTOR
16481 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16482 && SUBREG_BYTE (op2
) == 0))
16483 && can_create_pseudo_p ())
16486 switch (GET_MODE (SUBREG_REG (op1
)))
16492 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16493 if (GET_CODE (op2
) == CONST_VECTOR
)
16495 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16496 op2
= force_reg (GET_MODE (dst
), op2
);
16501 op2
= SUBREG_REG (operands
[2]);
16502 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16503 op2
= force_reg (GET_MODE (dst
), op2
);
16505 op1
= SUBREG_REG (op1
);
16506 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16507 op1
= force_reg (GET_MODE (dst
), op1
);
16508 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16509 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16511 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16517 if (!nonimmediate_operand (operands
[1], mode
))
16518 operands
[1] = force_reg (mode
, operands
[1]);
16519 if (!nonimmediate_operand (operands
[2], mode
))
16520 operands
[2] = force_reg (mode
, operands
[2]);
16521 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16522 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16523 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16527 /* Return TRUE or FALSE depending on whether the binary operator meets the
16528 appropriate constraints. */
16531 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16534 rtx dst
= operands
[0];
16535 rtx src1
= operands
[1];
16536 rtx src2
= operands
[2];
16538 /* Both source operands cannot be in memory. */
16539 if (MEM_P (src1
) && MEM_P (src2
))
16542 /* Canonicalize operand order for commutative operators. */
16543 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16550 /* If the destination is memory, we must have a matching source operand. */
16551 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16554 /* Source 1 cannot be a constant. */
16555 if (CONSTANT_P (src1
))
16558 /* Source 1 cannot be a non-matching memory. */
16559 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16560 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16561 return (code
== AND
16564 || (TARGET_64BIT
&& mode
== DImode
))
16565 && satisfies_constraint_L (src2
));
16570 /* Attempt to expand a unary operator. Make the expansion closer to the
16571 actual machine, then just general_operand, which will allow 2 separate
16572 memory references (one output, one input) in a single insn. */
16575 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16578 int matching_memory
;
16579 rtx src
, dst
, op
, clob
;
16584 /* If the destination is memory, and we do not have matching source
16585 operands, do things in registers. */
16586 matching_memory
= 0;
16589 if (rtx_equal_p (dst
, src
))
16590 matching_memory
= 1;
16592 dst
= gen_reg_rtx (mode
);
16595 /* When source operand is memory, destination must match. */
16596 if (MEM_P (src
) && !matching_memory
)
16597 src
= force_reg (mode
, src
);
16599 /* Emit the instruction. */
16601 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16602 if (reload_in_progress
|| code
== NOT
)
16604 /* Reload doesn't know about the flags register, and doesn't know that
16605 it doesn't want to clobber it. */
16606 gcc_assert (code
== NOT
);
16611 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16612 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16615 /* Fix up the destination if needed. */
16616 if (dst
!= operands
[0])
16617 emit_move_insn (operands
[0], dst
);
16620 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16621 divisor are within the range [0-255]. */
16624 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16627 rtx end_label
, qimode_label
;
16628 rtx insn
, div
, mod
;
16629 rtx scratch
, tmp0
, tmp1
, tmp2
;
16630 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16631 rtx (*gen_zero_extend
) (rtx
, rtx
);
16632 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16637 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16638 gen_test_ccno_1
= gen_testsi_ccno_1
;
16639 gen_zero_extend
= gen_zero_extendqisi2
;
16642 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16643 gen_test_ccno_1
= gen_testdi_ccno_1
;
16644 gen_zero_extend
= gen_zero_extendqidi2
;
16647 gcc_unreachable ();
16650 end_label
= gen_label_rtx ();
16651 qimode_label
= gen_label_rtx ();
16653 scratch
= gen_reg_rtx (mode
);
16655 /* Use 8bit unsigned divimod if dividend and divisor are within
16656 the range [0-255]. */
16657 emit_move_insn (scratch
, operands
[2]);
16658 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16659 scratch
, 1, OPTAB_DIRECT
);
16660 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16661 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16662 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16663 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16664 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16666 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16667 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16668 JUMP_LABEL (insn
) = qimode_label
;
16670 /* Generate original signed/unsigned divimod. */
16671 div
= gen_divmod4_1 (operands
[0], operands
[1],
16672 operands
[2], operands
[3]);
16675 /* Branch to the end. */
16676 emit_jump_insn (gen_jump (end_label
));
16679 /* Generate 8bit unsigned divide. */
16680 emit_label (qimode_label
);
16681 /* Don't use operands[0] for result of 8bit divide since not all
16682 registers support QImode ZERO_EXTRACT. */
16683 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16684 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16685 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16686 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16690 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16691 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16695 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16696 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16699 /* Extract remainder from AH. */
16700 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16701 if (REG_P (operands
[1]))
16702 insn
= emit_move_insn (operands
[1], tmp1
);
16705 /* Need a new scratch register since the old one has result
16707 scratch
= gen_reg_rtx (mode
);
16708 emit_move_insn (scratch
, tmp1
);
16709 insn
= emit_move_insn (operands
[1], scratch
);
16711 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16713 /* Zero extend quotient from AL. */
16714 tmp1
= gen_lowpart (QImode
, tmp0
);
16715 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16716 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16718 emit_label (end_label
);
16721 #define LEA_MAX_STALL (3)
16722 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16724 /* Increase given DISTANCE in half-cycles according to
16725 dependencies between PREV and NEXT instructions.
16726 Add 1 half-cycle if there is no dependency and
16727 go to next cycle if there is some dependecy. */
16729 static unsigned int
16730 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16735 if (!prev
|| !next
)
16736 return distance
+ (distance
& 1) + 2;
16738 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16739 return distance
+ 1;
16741 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16742 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16743 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16744 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16745 return distance
+ (distance
& 1) + 2;
16747 return distance
+ 1;
16750 /* Function checks if instruction INSN defines register number
16751 REGNO1 or REGNO2. */
16754 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16759 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16760 if (DF_REF_REG_DEF_P (*def_rec
)
16761 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16762 && (regno1
== DF_REF_REGNO (*def_rec
)
16763 || regno2
== DF_REF_REGNO (*def_rec
)))
16771 /* Function checks if instruction INSN uses register number
16772 REGNO as a part of address expression. */
16775 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16779 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16780 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16786 /* Search backward for non-agu definition of register number REGNO1
16787 or register number REGNO2 in basic block starting from instruction
16788 START up to head of basic block or instruction INSN.
16790 Function puts true value into *FOUND var if definition was found
16791 and false otherwise.
16793 Distance in half-cycles between START and found instruction or head
16794 of BB is added to DISTANCE and returned. */
16797 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16798 rtx insn
, int distance
,
16799 rtx start
, bool *found
)
16801 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16809 && distance
< LEA_SEARCH_THRESHOLD
)
16811 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16813 distance
= increase_distance (prev
, next
, distance
);
16814 if (insn_defines_reg (regno1
, regno2
, prev
))
16816 if (recog_memoized (prev
) < 0
16817 || get_attr_type (prev
) != TYPE_LEA
)
16826 if (prev
== BB_HEAD (bb
))
16829 prev
= PREV_INSN (prev
);
16835 /* Search backward for non-agu definition of register number REGNO1
16836 or register number REGNO2 in INSN's basic block until
16837 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16838 2. Reach neighbour BBs boundary, or
16839 3. Reach agu definition.
16840 Returns the distance between the non-agu definition point and INSN.
16841 If no definition point, returns -1. */
16844 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16847 basic_block bb
= BLOCK_FOR_INSN (insn
);
16849 bool found
= false;
16851 if (insn
!= BB_HEAD (bb
))
16852 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16853 distance
, PREV_INSN (insn
),
16856 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16860 bool simple_loop
= false;
16862 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16865 simple_loop
= true;
16870 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16872 BB_END (bb
), &found
);
16875 int shortest_dist
= -1;
16876 bool found_in_bb
= false;
16878 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16881 = distance_non_agu_define_in_bb (regno1
, regno2
,
16887 if (shortest_dist
< 0)
16888 shortest_dist
= bb_dist
;
16889 else if (bb_dist
> 0)
16890 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16896 distance
= shortest_dist
;
16900 /* get_attr_type may modify recog data. We want to make sure
16901 that recog data is valid for instruction INSN, on which
16902 distance_non_agu_define is called. INSN is unchanged here. */
16903 extract_insn_cached (insn
);
16908 return distance
>> 1;
16911 /* Return the distance in half-cycles between INSN and the next
16912 insn that uses register number REGNO in memory address added
16913 to DISTANCE. Return -1 if REGNO0 is set.
16915 Put true value into *FOUND if register usage was found and
16917 Put true value into *REDEFINED if register redefinition was
16918 found and false otherwise. */
16921 distance_agu_use_in_bb (unsigned int regno
,
16922 rtx insn
, int distance
, rtx start
,
16923 bool *found
, bool *redefined
)
16925 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16930 *redefined
= false;
16934 && distance
< LEA_SEARCH_THRESHOLD
)
16936 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16938 distance
= increase_distance(prev
, next
, distance
);
16939 if (insn_uses_reg_mem (regno
, next
))
16941 /* Return DISTANCE if OP0 is used in memory
16942 address in NEXT. */
16947 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16949 /* Return -1 if OP0 is set in NEXT. */
16957 if (next
== BB_END (bb
))
16960 next
= NEXT_INSN (next
);
16966 /* Return the distance between INSN and the next insn that uses
16967 register number REGNO0 in memory address. Return -1 if no such
16968 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16971 distance_agu_use (unsigned int regno0
, rtx insn
)
16973 basic_block bb
= BLOCK_FOR_INSN (insn
);
16975 bool found
= false;
16976 bool redefined
= false;
16978 if (insn
!= BB_END (bb
))
16979 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16981 &found
, &redefined
);
16983 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16987 bool simple_loop
= false;
16989 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16992 simple_loop
= true;
16997 distance
= distance_agu_use_in_bb (regno0
, insn
,
16998 distance
, BB_HEAD (bb
),
16999 &found
, &redefined
);
17002 int shortest_dist
= -1;
17003 bool found_in_bb
= false;
17004 bool redefined_in_bb
= false;
17006 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17009 = distance_agu_use_in_bb (regno0
, insn
,
17010 distance
, BB_HEAD (e
->dest
),
17011 &found_in_bb
, &redefined_in_bb
);
17014 if (shortest_dist
< 0)
17015 shortest_dist
= bb_dist
;
17016 else if (bb_dist
> 0)
17017 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17023 distance
= shortest_dist
;
17027 if (!found
|| redefined
)
17030 return distance
>> 1;
17033 /* Define this macro to tune LEA priority vs ADD, it take effect when
17034 there is a dilemma of choicing LEA or ADD
17035 Negative value: ADD is more preferred than LEA
17037 Positive value: LEA is more preferred than ADD*/
17038 #define IX86_LEA_PRIORITY 0
17040 /* Return true if usage of lea INSN has performance advantage
17041 over a sequence of instructions. Instructions sequence has
17042 SPLIT_COST cycles higher latency than lea latency. */
17045 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17046 unsigned int regno2
, int split_cost
)
17048 int dist_define
, dist_use
;
17050 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17051 dist_use
= distance_agu_use (regno0
, insn
);
17053 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17055 /* If there is no non AGU operand definition, no AGU
17056 operand usage and split cost is 0 then both lea
17057 and non lea variants have same priority. Currently
17058 we prefer lea for 64 bit code and non lea on 32 bit
17060 if (dist_use
< 0 && split_cost
== 0)
17061 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17066 /* With longer definitions distance lea is more preferable.
17067 Here we change it to take into account splitting cost and
17069 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17071 /* If there is no use in memory addess then we just check
17072 that split cost exceeds AGU stall. */
17074 return dist_define
> LEA_MAX_STALL
;
17076 /* If this insn has both backward non-agu dependence and forward
17077 agu dependence, the one with short distance takes effect. */
17078 return dist_define
>= dist_use
;
17081 /* Return true if it is legal to clobber flags by INSN and
17082 false otherwise. */
17085 ix86_ok_to_clobber_flags (rtx insn
)
17087 basic_block bb
= BLOCK_FOR_INSN (insn
);
17093 if (NONDEBUG_INSN_P (insn
))
17095 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17096 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17099 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17103 if (insn
== BB_END (bb
))
17106 insn
= NEXT_INSN (insn
);
17109 live
= df_get_live_out(bb
);
17110 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17113 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17114 move and add to avoid AGU stalls. */
17117 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17119 unsigned int regno0
, regno1
, regno2
;
17121 /* Check if we need to optimize. */
17122 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17125 /* Check it is correct to split here. */
17126 if (!ix86_ok_to_clobber_flags(insn
))
17129 regno0
= true_regnum (operands
[0]);
17130 regno1
= true_regnum (operands
[1]);
17131 regno2
= true_regnum (operands
[2]);
17133 /* We need to split only adds with non destructive
17134 destination operand. */
17135 if (regno0
== regno1
|| regno0
== regno2
)
17138 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17141 /* Return true if we should emit lea instruction instead of mov
17145 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17147 unsigned int regno0
, regno1
;
17149 /* Check if we need to optimize. */
17150 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17153 /* Use lea for reg to reg moves only. */
17154 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17157 regno0
= true_regnum (operands
[0]);
17158 regno1
= true_regnum (operands
[1]);
17160 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17163 /* Return true if we need to split lea into a sequence of
17164 instructions to avoid AGU stalls. */
17167 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17169 unsigned int regno0
, regno1
, regno2
;
17171 struct ix86_address parts
;
17174 /* Check we need to optimize. */
17175 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17178 /* Check it is correct to split here. */
17179 if (!ix86_ok_to_clobber_flags(insn
))
17182 ok
= ix86_decompose_address (operands
[1], &parts
);
17185 /* There should be at least two components in the address. */
17186 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17187 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17190 /* We should not split into add if non legitimate pic
17191 operand is used as displacement. */
17192 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17195 regno0
= true_regnum (operands
[0]) ;
17196 regno1
= INVALID_REGNUM
;
17197 regno2
= INVALID_REGNUM
;
17200 regno1
= true_regnum (parts
.base
);
17202 regno2
= true_regnum (parts
.index
);
17206 /* Compute how many cycles we will add to execution time
17207 if split lea into a sequence of instructions. */
17208 if (parts
.base
|| parts
.index
)
17210 /* Have to use mov instruction if non desctructive
17211 destination form is used. */
17212 if (regno1
!= regno0
&& regno2
!= regno0
)
17215 /* Have to add index to base if both exist. */
17216 if (parts
.base
&& parts
.index
)
17219 /* Have to use shift and adds if scale is 2 or greater. */
17220 if (parts
.scale
> 1)
17222 if (regno0
!= regno1
)
17224 else if (regno2
== regno0
)
17227 split_cost
+= parts
.scale
;
17230 /* Have to use add instruction with immediate if
17231 disp is non zero. */
17232 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17235 /* Subtract the price of lea. */
17239 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17242 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17243 matches destination. RTX includes clobber of FLAGS_REG. */
17246 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17251 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17252 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17254 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17257 /* Return true if regno1 def is nearest to the insn. */
17260 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17263 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17267 while (prev
&& prev
!= start
)
17269 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17271 prev
= PREV_INSN (prev
);
17274 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17276 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17278 prev
= PREV_INSN (prev
);
17281 /* None of the regs is defined in the bb. */
17285 /* Split lea instructions into a sequence of instructions
17286 which are executed on ALU to avoid AGU stalls.
17287 It is assumed that it is allowed to clobber flags register
17288 at lea position. */
17291 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17293 unsigned int regno0
, regno1
, regno2
;
17294 struct ix86_address parts
;
17298 ok
= ix86_decompose_address (operands
[1], &parts
);
17301 target
= gen_lowpart (mode
, operands
[0]);
17303 regno0
= true_regnum (target
);
17304 regno1
= INVALID_REGNUM
;
17305 regno2
= INVALID_REGNUM
;
17309 parts
.base
= gen_lowpart (mode
, parts
.base
);
17310 regno1
= true_regnum (parts
.base
);
17315 parts
.index
= gen_lowpart (mode
, parts
.index
);
17316 regno2
= true_regnum (parts
.index
);
17320 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17322 if (parts
.scale
> 1)
17324 /* Case r1 = r1 + ... */
17325 if (regno1
== regno0
)
17327 /* If we have a case r1 = r1 + C * r1 then we
17328 should use multiplication which is very
17329 expensive. Assume cost model is wrong if we
17330 have such case here. */
17331 gcc_assert (regno2
!= regno0
);
17333 for (adds
= parts
.scale
; adds
> 0; adds
--)
17334 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17338 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17339 if (regno0
!= regno2
)
17340 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17342 /* Use shift for scaling. */
17343 ix86_emit_binop (ASHIFT
, mode
, target
,
17344 GEN_INT (exact_log2 (parts
.scale
)));
17347 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17349 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17350 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17353 else if (!parts
.base
&& !parts
.index
)
17355 gcc_assert(parts
.disp
);
17356 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17362 if (regno0
!= regno2
)
17363 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17365 else if (!parts
.index
)
17367 if (regno0
!= regno1
)
17368 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17372 if (regno0
== regno1
)
17374 else if (regno0
== regno2
)
17380 /* Find better operand for SET instruction, depending
17381 on which definition is farther from the insn. */
17382 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17383 tmp
= parts
.index
, tmp1
= parts
.base
;
17385 tmp
= parts
.base
, tmp1
= parts
.index
;
17387 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17389 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17390 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17392 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17396 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17399 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17400 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17404 /* Return true if it is ok to optimize an ADD operation to LEA
17405 operation to avoid flag register consumation. For most processors,
17406 ADD is faster than LEA. For the processors like ATOM, if the
17407 destination register of LEA holds an actual address which will be
17408 used soon, LEA is better and otherwise ADD is better. */
17411 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17413 unsigned int regno0
= true_regnum (operands
[0]);
17414 unsigned int regno1
= true_regnum (operands
[1]);
17415 unsigned int regno2
= true_regnum (operands
[2]);
17417 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17418 if (regno0
!= regno1
&& regno0
!= regno2
)
17421 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17424 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17427 /* Return true if destination reg of SET_BODY is shift count of
17431 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17437 /* Retrieve destination of SET_BODY. */
17438 switch (GET_CODE (set_body
))
17441 set_dest
= SET_DEST (set_body
);
17442 if (!set_dest
|| !REG_P (set_dest
))
17446 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17447 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17455 /* Retrieve shift count of USE_BODY. */
17456 switch (GET_CODE (use_body
))
17459 shift_rtx
= XEXP (use_body
, 1);
17462 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17463 if (ix86_dep_by_shift_count_body (set_body
,
17464 XVECEXP (use_body
, 0, i
)))
17472 && (GET_CODE (shift_rtx
) == ASHIFT
17473 || GET_CODE (shift_rtx
) == LSHIFTRT
17474 || GET_CODE (shift_rtx
) == ASHIFTRT
17475 || GET_CODE (shift_rtx
) == ROTATE
17476 || GET_CODE (shift_rtx
) == ROTATERT
))
17478 rtx shift_count
= XEXP (shift_rtx
, 1);
17480 /* Return true if shift count is dest of SET_BODY. */
17481 if (REG_P (shift_count
))
17483 /* Add check since it can be invoked before register
17484 allocation in pre-reload schedule. */
17485 if (reload_completed
17486 && true_regnum (set_dest
) == true_regnum (shift_count
))
17488 else if (REGNO(set_dest
) == REGNO(shift_count
))
17496 /* Return true if destination reg of SET_INSN is shift count of
17500 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17502 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17503 PATTERN (use_insn
));
17506 /* Return TRUE or FALSE depending on whether the unary operator meets the
17507 appropriate constraints. */
17510 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17511 enum machine_mode mode ATTRIBUTE_UNUSED
,
17512 rtx operands
[2] ATTRIBUTE_UNUSED
)
17514 /* If one of operands is memory, source and destination must match. */
17515 if ((MEM_P (operands
[0])
17516 || MEM_P (operands
[1]))
17517 && ! rtx_equal_p (operands
[0], operands
[1]))
17522 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17523 are ok, keeping in mind the possible movddup alternative. */
17526 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17528 if (MEM_P (operands
[0]))
17529 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17530 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17531 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17535 /* Post-reload splitter for converting an SF or DFmode value in an
17536 SSE register into an unsigned SImode. */
17539 ix86_split_convert_uns_si_sse (rtx operands
[])
17541 enum machine_mode vecmode
;
17542 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17544 large
= operands
[1];
17545 zero_or_two31
= operands
[2];
17546 input
= operands
[3];
17547 two31
= operands
[4];
17548 vecmode
= GET_MODE (large
);
17549 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17551 /* Load up the value into the low element. We must ensure that the other
17552 elements are valid floats -- zero is the easiest such value. */
17555 if (vecmode
== V4SFmode
)
17556 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17558 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17562 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17563 emit_move_insn (value
, CONST0_RTX (vecmode
));
17564 if (vecmode
== V4SFmode
)
17565 emit_insn (gen_sse_movss (value
, value
, input
));
17567 emit_insn (gen_sse2_movsd (value
, value
, input
));
17570 emit_move_insn (large
, two31
);
17571 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17573 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17574 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17576 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17577 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17579 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17580 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17582 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17583 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17585 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17586 if (vecmode
== V4SFmode
)
17587 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17589 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17592 emit_insn (gen_xorv4si3 (value
, value
, large
));
17595 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17596 Expects the 64-bit DImode to be supplied in a pair of integral
17597 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17598 -mfpmath=sse, !optimize_size only. */
17601 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17603 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17604 rtx int_xmm
, fp_xmm
;
17605 rtx biases
, exponents
;
17608 int_xmm
= gen_reg_rtx (V4SImode
);
17609 if (TARGET_INTER_UNIT_MOVES
)
17610 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17611 else if (TARGET_SSE_SPLIT_REGS
)
17613 emit_clobber (int_xmm
);
17614 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17618 x
= gen_reg_rtx (V2DImode
);
17619 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17620 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17623 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17624 gen_rtvec (4, GEN_INT (0x43300000UL
),
17625 GEN_INT (0x45300000UL
),
17626 const0_rtx
, const0_rtx
));
17627 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17629 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17630 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17632 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17633 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17634 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17635 (0x1.0p84 + double(fp_value_hi_xmm)).
17636 Note these exponents differ by 32. */
17638 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17640 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17641 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17642 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17643 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17644 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17645 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17646 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17647 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17648 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17650 /* Add the upper and lower DFmode values together. */
17652 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17655 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17656 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17657 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17660 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17663 /* Not used, but eases macroization of patterns. */
17665 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17666 rtx input ATTRIBUTE_UNUSED
)
17668 gcc_unreachable ();
17671 /* Convert an unsigned SImode value into a DFmode. Only currently used
17672 for SSE, but applicable anywhere. */
17675 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17677 REAL_VALUE_TYPE TWO31r
;
17680 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17681 NULL
, 1, OPTAB_DIRECT
);
17683 fp
= gen_reg_rtx (DFmode
);
17684 emit_insn (gen_floatsidf2 (fp
, x
));
17686 real_ldexp (&TWO31r
, &dconst1
, 31);
17687 x
= const_double_from_real_value (TWO31r
, DFmode
);
17689 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17691 emit_move_insn (target
, x
);
17694 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17695 32-bit mode; otherwise we have a direct convert instruction. */
17698 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17700 REAL_VALUE_TYPE TWO32r
;
17701 rtx fp_lo
, fp_hi
, x
;
17703 fp_lo
= gen_reg_rtx (DFmode
);
17704 fp_hi
= gen_reg_rtx (DFmode
);
17706 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17708 real_ldexp (&TWO32r
, &dconst1
, 32);
17709 x
= const_double_from_real_value (TWO32r
, DFmode
);
17710 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17712 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17714 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17717 emit_move_insn (target
, x
);
17720 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17721 For x86_32, -mfpmath=sse, !optimize_size only. */
17723 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17725 REAL_VALUE_TYPE ONE16r
;
17726 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17728 real_ldexp (&ONE16r
, &dconst1
, 16);
17729 x
= const_double_from_real_value (ONE16r
, SFmode
);
17730 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17731 NULL
, 0, OPTAB_DIRECT
);
17732 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17733 NULL
, 0, OPTAB_DIRECT
);
17734 fp_hi
= gen_reg_rtx (SFmode
);
17735 fp_lo
= gen_reg_rtx (SFmode
);
17736 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17737 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17738 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17740 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17742 if (!rtx_equal_p (target
, fp_hi
))
17743 emit_move_insn (target
, fp_hi
);
17746 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17747 a vector of unsigned ints VAL to vector of floats TARGET. */
17750 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17753 REAL_VALUE_TYPE TWO16r
;
17754 enum machine_mode intmode
= GET_MODE (val
);
17755 enum machine_mode fltmode
= GET_MODE (target
);
17756 rtx (*cvt
) (rtx
, rtx
);
17758 if (intmode
== V4SImode
)
17759 cvt
= gen_floatv4siv4sf2
;
17761 cvt
= gen_floatv8siv8sf2
;
17762 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17763 tmp
[0] = force_reg (intmode
, tmp
[0]);
17764 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17766 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17767 NULL_RTX
, 1, OPTAB_DIRECT
);
17768 tmp
[3] = gen_reg_rtx (fltmode
);
17769 emit_insn (cvt (tmp
[3], tmp
[1]));
17770 tmp
[4] = gen_reg_rtx (fltmode
);
17771 emit_insn (cvt (tmp
[4], tmp
[2]));
17772 real_ldexp (&TWO16r
, &dconst1
, 16);
17773 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17774 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17775 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17777 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17779 if (tmp
[7] != target
)
17780 emit_move_insn (target
, tmp
[7]);
17783 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17784 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17785 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17786 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17789 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17791 REAL_VALUE_TYPE TWO31r
;
17792 rtx two31r
, tmp
[4];
17793 enum machine_mode mode
= GET_MODE (val
);
17794 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17795 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17796 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17799 for (i
= 0; i
< 3; i
++)
17800 tmp
[i
] = gen_reg_rtx (mode
);
17801 real_ldexp (&TWO31r
, &dconst1
, 31);
17802 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17803 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17804 two31r
= force_reg (mode
, two31r
);
17807 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17808 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17809 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17810 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17811 default: gcc_unreachable ();
17813 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17814 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17815 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17817 if (intmode
== V4SImode
|| TARGET_AVX2
)
17818 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17819 gen_lowpart (intmode
, tmp
[0]),
17820 GEN_INT (31), NULL_RTX
, 0,
17824 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17825 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17826 *xorp
= expand_simple_binop (intmode
, AND
,
17827 gen_lowpart (intmode
, tmp
[0]),
17828 two31
, NULL_RTX
, 0,
17831 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17835 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17836 then replicate the value for all elements of the vector
17840 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17844 enum machine_mode scalar_mode
;
17861 n_elt
= GET_MODE_NUNITS (mode
);
17862 v
= rtvec_alloc (n_elt
);
17863 scalar_mode
= GET_MODE_INNER (mode
);
17865 RTVEC_ELT (v
, 0) = value
;
17867 for (i
= 1; i
< n_elt
; ++i
)
17868 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17870 return gen_rtx_CONST_VECTOR (mode
, v
);
17873 gcc_unreachable ();
17877 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17878 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17879 for an SSE register. If VECT is true, then replicate the mask for
17880 all elements of the vector register. If INVERT is true, then create
17881 a mask excluding the sign bit. */
17884 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17886 enum machine_mode vec_mode
, imode
;
17887 HOST_WIDE_INT hi
, lo
;
17892 /* Find the sign bit, sign extended to 2*HWI. */
17900 mode
= GET_MODE_INNER (mode
);
17902 lo
= 0x80000000, hi
= lo
< 0;
17910 mode
= GET_MODE_INNER (mode
);
17912 if (HOST_BITS_PER_WIDE_INT
>= 64)
17913 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17915 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17920 vec_mode
= VOIDmode
;
17921 if (HOST_BITS_PER_WIDE_INT
>= 64)
17924 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17931 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17935 lo
= ~lo
, hi
= ~hi
;
17941 mask
= immed_double_const (lo
, hi
, imode
);
17943 vec
= gen_rtvec (2, v
, mask
);
17944 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17945 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17952 gcc_unreachable ();
17956 lo
= ~lo
, hi
= ~hi
;
17958 /* Force this value into the low part of a fp vector constant. */
17959 mask
= immed_double_const (lo
, hi
, imode
);
17960 mask
= gen_lowpart (mode
, mask
);
17962 if (vec_mode
== VOIDmode
)
17963 return force_reg (mode
, mask
);
17965 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17966 return force_reg (vec_mode
, v
);
17969 /* Generate code for floating point ABS or NEG. */
17972 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17975 rtx mask
, set
, dst
, src
;
17976 bool use_sse
= false;
17977 bool vector_mode
= VECTOR_MODE_P (mode
);
17978 enum machine_mode vmode
= mode
;
17982 else if (mode
== TFmode
)
17984 else if (TARGET_SSE_MATH
)
17986 use_sse
= SSE_FLOAT_MODE_P (mode
);
17987 if (mode
== SFmode
)
17989 else if (mode
== DFmode
)
17993 /* NEG and ABS performed with SSE use bitwise mask operations.
17994 Create the appropriate mask now. */
17996 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18003 set
= gen_rtx_fmt_e (code
, mode
, src
);
18004 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18011 use
= gen_rtx_USE (VOIDmode
, mask
);
18013 par
= gen_rtvec (2, set
, use
);
18016 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18017 par
= gen_rtvec (3, set
, use
, clob
);
18019 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18025 /* Expand a copysign operation. Special case operand 0 being a constant. */
18028 ix86_expand_copysign (rtx operands
[])
18030 enum machine_mode mode
, vmode
;
18031 rtx dest
, op0
, op1
, mask
, nmask
;
18033 dest
= operands
[0];
18037 mode
= GET_MODE (dest
);
18039 if (mode
== SFmode
)
18041 else if (mode
== DFmode
)
18046 if (GET_CODE (op0
) == CONST_DOUBLE
)
18048 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18050 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18051 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18053 if (mode
== SFmode
|| mode
== DFmode
)
18055 if (op0
== CONST0_RTX (mode
))
18056 op0
= CONST0_RTX (vmode
);
18059 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18061 op0
= force_reg (vmode
, v
);
18064 else if (op0
!= CONST0_RTX (mode
))
18065 op0
= force_reg (mode
, op0
);
18067 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18069 if (mode
== SFmode
)
18070 copysign_insn
= gen_copysignsf3_const
;
18071 else if (mode
== DFmode
)
18072 copysign_insn
= gen_copysigndf3_const
;
18074 copysign_insn
= gen_copysigntf3_const
;
18076 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18080 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18082 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18083 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18085 if (mode
== SFmode
)
18086 copysign_insn
= gen_copysignsf3_var
;
18087 else if (mode
== DFmode
)
18088 copysign_insn
= gen_copysigndf3_var
;
18090 copysign_insn
= gen_copysigntf3_var
;
18092 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18096 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18097 be a constant, and so has already been expanded into a vector constant. */
18100 ix86_split_copysign_const (rtx operands
[])
18102 enum machine_mode mode
, vmode
;
18103 rtx dest
, op0
, mask
, x
;
18105 dest
= operands
[0];
18107 mask
= operands
[3];
18109 mode
= GET_MODE (dest
);
18110 vmode
= GET_MODE (mask
);
18112 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18113 x
= gen_rtx_AND (vmode
, dest
, mask
);
18114 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18116 if (op0
!= CONST0_RTX (vmode
))
18118 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18119 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18123 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18124 so we have to do two masks. */
18127 ix86_split_copysign_var (rtx operands
[])
18129 enum machine_mode mode
, vmode
;
18130 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18132 dest
= operands
[0];
18133 scratch
= operands
[1];
18136 nmask
= operands
[4];
18137 mask
= operands
[5];
18139 mode
= GET_MODE (dest
);
18140 vmode
= GET_MODE (mask
);
18142 if (rtx_equal_p (op0
, op1
))
18144 /* Shouldn't happen often (it's useless, obviously), but when it does
18145 we'd generate incorrect code if we continue below. */
18146 emit_move_insn (dest
, op0
);
18150 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18152 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18154 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18155 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18158 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18159 x
= gen_rtx_NOT (vmode
, dest
);
18160 x
= gen_rtx_AND (vmode
, x
, op0
);
18161 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18165 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18167 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18169 else /* alternative 2,4 */
18171 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18172 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18173 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18175 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18177 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18179 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18180 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18182 else /* alternative 3,4 */
18184 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18186 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18187 x
= gen_rtx_AND (vmode
, dest
, op0
);
18189 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18192 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18193 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18196 /* Return TRUE or FALSE depending on whether the first SET in INSN
18197 has source and destination with matching CC modes, and that the
18198 CC mode is at least as constrained as REQ_MODE. */
18201 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18204 enum machine_mode set_mode
;
18206 set
= PATTERN (insn
);
18207 if (GET_CODE (set
) == PARALLEL
)
18208 set
= XVECEXP (set
, 0, 0);
18209 gcc_assert (GET_CODE (set
) == SET
);
18210 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18212 set_mode
= GET_MODE (SET_DEST (set
));
18216 if (req_mode
!= CCNOmode
18217 && (req_mode
!= CCmode
18218 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18222 if (req_mode
== CCGCmode
)
18226 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18230 if (req_mode
== CCZmode
)
18240 if (set_mode
!= req_mode
)
18245 gcc_unreachable ();
18248 return GET_MODE (SET_SRC (set
)) == set_mode
;
18251 /* Generate insn patterns to do an integer compare of OPERANDS. */
18254 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18256 enum machine_mode cmpmode
;
18259 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18260 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18262 /* This is very simple, but making the interface the same as in the
18263 FP case makes the rest of the code easier. */
18264 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18265 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18267 /* Return the test that should be put into the flags user, i.e.
18268 the bcc, scc, or cmov instruction. */
18269 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18272 /* Figure out whether to use ordered or unordered fp comparisons.
18273 Return the appropriate mode to use. */
18276 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18278 /* ??? In order to make all comparisons reversible, we do all comparisons
18279 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18280 all forms trapping and nontrapping comparisons, we can make inequality
18281 comparisons trapping again, since it results in better code when using
18282 FCOM based compares. */
18283 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18287 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18289 enum machine_mode mode
= GET_MODE (op0
);
18291 if (SCALAR_FLOAT_MODE_P (mode
))
18293 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18294 return ix86_fp_compare_mode (code
);
18299 /* Only zero flag is needed. */
18300 case EQ
: /* ZF=0 */
18301 case NE
: /* ZF!=0 */
18303 /* Codes needing carry flag. */
18304 case GEU
: /* CF=0 */
18305 case LTU
: /* CF=1 */
18306 /* Detect overflow checks. They need just the carry flag. */
18307 if (GET_CODE (op0
) == PLUS
18308 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18312 case GTU
: /* CF=0 & ZF=0 */
18313 case LEU
: /* CF=1 | ZF=1 */
18314 /* Detect overflow checks. They need just the carry flag. */
18315 if (GET_CODE (op0
) == MINUS
18316 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18320 /* Codes possibly doable only with sign flag when
18321 comparing against zero. */
18322 case GE
: /* SF=OF or SF=0 */
18323 case LT
: /* SF<>OF or SF=1 */
18324 if (op1
== const0_rtx
)
18327 /* For other cases Carry flag is not required. */
18329 /* Codes doable only with sign flag when comparing
18330 against zero, but we miss jump instruction for it
18331 so we need to use relational tests against overflow
18332 that thus needs to be zero. */
18333 case GT
: /* ZF=0 & SF=OF */
18334 case LE
: /* ZF=1 | SF<>OF */
18335 if (op1
== const0_rtx
)
18339 /* strcmp pattern do (use flags) and combine may ask us for proper
18344 gcc_unreachable ();
18348 /* Return the fixed registers used for condition codes. */
18351 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18358 /* If two condition code modes are compatible, return a condition code
18359 mode which is compatible with both. Otherwise, return
18362 static enum machine_mode
18363 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18368 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18371 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18372 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18375 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18377 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18383 gcc_unreachable ();
18413 /* These are only compatible with themselves, which we already
18420 /* Return a comparison we can do and that it is equivalent to
18421 swap_condition (code) apart possibly from orderedness.
18422 But, never change orderedness if TARGET_IEEE_FP, returning
18423 UNKNOWN in that case if necessary. */
18425 static enum rtx_code
18426 ix86_fp_swap_condition (enum rtx_code code
)
18430 case GT
: /* GTU - CF=0 & ZF=0 */
18431 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18432 case GE
: /* GEU - CF=0 */
18433 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18434 case UNLT
: /* LTU - CF=1 */
18435 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18436 case UNLE
: /* LEU - CF=1 | ZF=1 */
18437 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18439 return swap_condition (code
);
18443 /* Return cost of comparison CODE using the best strategy for performance.
18444 All following functions do use number of instructions as a cost metrics.
18445 In future this should be tweaked to compute bytes for optimize_size and
18446 take into account performance of various instructions on various CPUs. */
18449 ix86_fp_comparison_cost (enum rtx_code code
)
18453 /* The cost of code using bit-twiddling on %ah. */
18470 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18474 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18477 gcc_unreachable ();
18480 switch (ix86_fp_comparison_strategy (code
))
18482 case IX86_FPCMP_COMI
:
18483 return arith_cost
> 4 ? 3 : 2;
18484 case IX86_FPCMP_SAHF
:
18485 return arith_cost
> 4 ? 4 : 3;
18491 /* Return strategy to use for floating-point. We assume that fcomi is always
18492 preferrable where available, since that is also true when looking at size
18493 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18495 enum ix86_fpcmp_strategy
18496 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18498 /* Do fcomi/sahf based test when profitable. */
18501 return IX86_FPCMP_COMI
;
18503 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18504 return IX86_FPCMP_SAHF
;
18506 return IX86_FPCMP_ARITH
;
18509 /* Swap, force into registers, or otherwise massage the two operands
18510 to a fp comparison. The operands are updated in place; the new
18511 comparison code is returned. */
18513 static enum rtx_code
18514 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18516 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18517 rtx op0
= *pop0
, op1
= *pop1
;
18518 enum machine_mode op_mode
= GET_MODE (op0
);
18519 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18521 /* All of the unordered compare instructions only work on registers.
18522 The same is true of the fcomi compare instructions. The XFmode
18523 compare instructions require registers except when comparing
18524 against zero or when converting operand 1 from fixed point to
18528 && (fpcmp_mode
== CCFPUmode
18529 || (op_mode
== XFmode
18530 && ! (standard_80387_constant_p (op0
) == 1
18531 || standard_80387_constant_p (op1
) == 1)
18532 && GET_CODE (op1
) != FLOAT
)
18533 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18535 op0
= force_reg (op_mode
, op0
);
18536 op1
= force_reg (op_mode
, op1
);
18540 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18541 things around if they appear profitable, otherwise force op0
18542 into a register. */
18544 if (standard_80387_constant_p (op0
) == 0
18546 && ! (standard_80387_constant_p (op1
) == 0
18549 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18550 if (new_code
!= UNKNOWN
)
18553 tmp
= op0
, op0
= op1
, op1
= tmp
;
18559 op0
= force_reg (op_mode
, op0
);
18561 if (CONSTANT_P (op1
))
18563 int tmp
= standard_80387_constant_p (op1
);
18565 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18569 op1
= force_reg (op_mode
, op1
);
18572 op1
= force_reg (op_mode
, op1
);
18576 /* Try to rearrange the comparison to make it cheaper. */
18577 if (ix86_fp_comparison_cost (code
)
18578 > ix86_fp_comparison_cost (swap_condition (code
))
18579 && (REG_P (op1
) || can_create_pseudo_p ()))
18582 tmp
= op0
, op0
= op1
, op1
= tmp
;
18583 code
= swap_condition (code
);
18585 op0
= force_reg (op_mode
, op0
);
18593 /* Convert comparison codes we use to represent FP comparison to integer
18594 code that will result in proper branch. Return UNKNOWN if no such code
18598 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18627 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18630 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18632 enum machine_mode fpcmp_mode
, intcmp_mode
;
18635 fpcmp_mode
= ix86_fp_compare_mode (code
);
18636 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18638 /* Do fcomi/sahf based test when profitable. */
18639 switch (ix86_fp_comparison_strategy (code
))
18641 case IX86_FPCMP_COMI
:
18642 intcmp_mode
= fpcmp_mode
;
18643 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18644 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18649 case IX86_FPCMP_SAHF
:
18650 intcmp_mode
= fpcmp_mode
;
18651 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18652 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18656 scratch
= gen_reg_rtx (HImode
);
18657 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18658 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18661 case IX86_FPCMP_ARITH
:
18662 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18663 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18664 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18666 scratch
= gen_reg_rtx (HImode
);
18667 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18669 /* In the unordered case, we have to check C2 for NaN's, which
18670 doesn't happen to work out to anything nice combination-wise.
18671 So do some bit twiddling on the value we've got in AH to come
18672 up with an appropriate set of condition codes. */
18674 intcmp_mode
= CCNOmode
;
18679 if (code
== GT
|| !TARGET_IEEE_FP
)
18681 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18686 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18687 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18688 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18689 intcmp_mode
= CCmode
;
18695 if (code
== LT
&& TARGET_IEEE_FP
)
18697 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18698 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18699 intcmp_mode
= CCmode
;
18704 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18710 if (code
== GE
|| !TARGET_IEEE_FP
)
18712 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18717 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18718 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18724 if (code
== LE
&& TARGET_IEEE_FP
)
18726 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18727 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18728 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18729 intcmp_mode
= CCmode
;
18734 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18740 if (code
== EQ
&& TARGET_IEEE_FP
)
18742 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18743 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18744 intcmp_mode
= CCmode
;
18749 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18755 if (code
== NE
&& TARGET_IEEE_FP
)
18757 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18758 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18764 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18770 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18774 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18779 gcc_unreachable ();
18787 /* Return the test that should be put into the flags user, i.e.
18788 the bcc, scc, or cmov instruction. */
18789 return gen_rtx_fmt_ee (code
, VOIDmode
,
18790 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18795 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18799 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18800 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18802 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18804 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18805 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18808 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18814 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18816 enum machine_mode mode
= GET_MODE (op0
);
18828 tmp
= ix86_expand_compare (code
, op0
, op1
);
18829 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18830 gen_rtx_LABEL_REF (VOIDmode
, label
),
18832 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18839 /* Expand DImode branch into multiple compare+branch. */
18841 rtx lo
[2], hi
[2], label2
;
18842 enum rtx_code code1
, code2
, code3
;
18843 enum machine_mode submode
;
18845 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18847 tmp
= op0
, op0
= op1
, op1
= tmp
;
18848 code
= swap_condition (code
);
18851 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18852 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18854 submode
= mode
== DImode
? SImode
: DImode
;
18856 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18857 avoid two branches. This costs one extra insn, so disable when
18858 optimizing for size. */
18860 if ((code
== EQ
|| code
== NE
)
18861 && (!optimize_insn_for_size_p ()
18862 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18867 if (hi
[1] != const0_rtx
)
18868 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18869 NULL_RTX
, 0, OPTAB_WIDEN
);
18872 if (lo
[1] != const0_rtx
)
18873 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18874 NULL_RTX
, 0, OPTAB_WIDEN
);
18876 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18877 NULL_RTX
, 0, OPTAB_WIDEN
);
18879 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18883 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18884 op1 is a constant and the low word is zero, then we can just
18885 examine the high word. Similarly for low word -1 and
18886 less-or-equal-than or greater-than. */
18888 if (CONST_INT_P (hi
[1]))
18891 case LT
: case LTU
: case GE
: case GEU
:
18892 if (lo
[1] == const0_rtx
)
18894 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18898 case LE
: case LEU
: case GT
: case GTU
:
18899 if (lo
[1] == constm1_rtx
)
18901 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18909 /* Otherwise, we need two or three jumps. */
18911 label2
= gen_label_rtx ();
18914 code2
= swap_condition (code
);
18915 code3
= unsigned_condition (code
);
18919 case LT
: case GT
: case LTU
: case GTU
:
18922 case LE
: code1
= LT
; code2
= GT
; break;
18923 case GE
: code1
= GT
; code2
= LT
; break;
18924 case LEU
: code1
= LTU
; code2
= GTU
; break;
18925 case GEU
: code1
= GTU
; code2
= LTU
; break;
18927 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18928 case NE
: code2
= UNKNOWN
; break;
18931 gcc_unreachable ();
18936 * if (hi(a) < hi(b)) goto true;
18937 * if (hi(a) > hi(b)) goto false;
18938 * if (lo(a) < lo(b)) goto true;
18942 if (code1
!= UNKNOWN
)
18943 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18944 if (code2
!= UNKNOWN
)
18945 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18947 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18949 if (code2
!= UNKNOWN
)
18950 emit_label (label2
);
18955 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18960 /* Split branch based on floating point condition. */
18962 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18963 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18968 if (target2
!= pc_rtx
)
18971 code
= reverse_condition_maybe_unordered (code
);
18976 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18979 /* Remove pushed operand from stack. */
18981 ix86_free_from_memory (GET_MODE (pushed
));
18983 i
= emit_jump_insn (gen_rtx_SET
18985 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18986 condition
, target1
, target2
)));
18987 if (split_branch_probability
>= 0)
18988 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18992 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18996 gcc_assert (GET_MODE (dest
) == QImode
);
18998 ret
= ix86_expand_compare (code
, op0
, op1
);
18999 PUT_MODE (ret
, QImode
);
19000 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19003 /* Expand comparison setting or clearing carry flag. Return true when
19004 successful and set pop for the operation. */
19006 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19008 enum machine_mode mode
=
19009 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19011 /* Do not handle double-mode compares that go through special path. */
19012 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19015 if (SCALAR_FLOAT_MODE_P (mode
))
19017 rtx compare_op
, compare_seq
;
19019 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19021 /* Shortcut: following common codes never translate
19022 into carry flag compares. */
19023 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19024 || code
== ORDERED
|| code
== UNORDERED
)
19027 /* These comparisons require zero flag; swap operands so they won't. */
19028 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19029 && !TARGET_IEEE_FP
)
19034 code
= swap_condition (code
);
19037 /* Try to expand the comparison and verify that we end up with
19038 carry flag based comparison. This fails to be true only when
19039 we decide to expand comparison using arithmetic that is not
19040 too common scenario. */
19042 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19043 compare_seq
= get_insns ();
19046 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19047 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19048 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19050 code
= GET_CODE (compare_op
);
19052 if (code
!= LTU
&& code
!= GEU
)
19055 emit_insn (compare_seq
);
19060 if (!INTEGRAL_MODE_P (mode
))
19069 /* Convert a==0 into (unsigned)a<1. */
19072 if (op1
!= const0_rtx
)
19075 code
= (code
== EQ
? LTU
: GEU
);
19078 /* Convert a>b into b<a or a>=b-1. */
19081 if (CONST_INT_P (op1
))
19083 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19084 /* Bail out on overflow. We still can swap operands but that
19085 would force loading of the constant into register. */
19086 if (op1
== const0_rtx
19087 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19089 code
= (code
== GTU
? GEU
: LTU
);
19096 code
= (code
== GTU
? LTU
: GEU
);
19100 /* Convert a>=0 into (unsigned)a<0x80000000. */
19103 if (mode
== DImode
|| op1
!= const0_rtx
)
19105 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19106 code
= (code
== LT
? GEU
: LTU
);
19110 if (mode
== DImode
|| op1
!= constm1_rtx
)
19112 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19113 code
= (code
== LE
? GEU
: LTU
);
19119 /* Swapping operands may cause constant to appear as first operand. */
19120 if (!nonimmediate_operand (op0
, VOIDmode
))
19122 if (!can_create_pseudo_p ())
19124 op0
= force_reg (mode
, op0
);
19126 *pop
= ix86_expand_compare (code
, op0
, op1
);
19127 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19132 ix86_expand_int_movcc (rtx operands
[])
19134 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19135 rtx compare_seq
, compare_op
;
19136 enum machine_mode mode
= GET_MODE (operands
[0]);
19137 bool sign_bit_compare_p
= false;
19138 rtx op0
= XEXP (operands
[1], 0);
19139 rtx op1
= XEXP (operands
[1], 1);
19141 if (GET_MODE (op0
) == TImode
19142 || (GET_MODE (op0
) == DImode
19147 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19148 compare_seq
= get_insns ();
19151 compare_code
= GET_CODE (compare_op
);
19153 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19154 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19155 sign_bit_compare_p
= true;
19157 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19158 HImode insns, we'd be swallowed in word prefix ops. */
19160 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19161 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19162 && CONST_INT_P (operands
[2])
19163 && CONST_INT_P (operands
[3]))
19165 rtx out
= operands
[0];
19166 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19167 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19168 HOST_WIDE_INT diff
;
19171 /* Sign bit compares are better done using shifts than we do by using
19173 if (sign_bit_compare_p
19174 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19176 /* Detect overlap between destination and compare sources. */
19179 if (!sign_bit_compare_p
)
19182 bool fpcmp
= false;
19184 compare_code
= GET_CODE (compare_op
);
19186 flags
= XEXP (compare_op
, 0);
19188 if (GET_MODE (flags
) == CCFPmode
19189 || GET_MODE (flags
) == CCFPUmode
)
19193 = ix86_fp_compare_code_to_integer (compare_code
);
19196 /* To simplify rest of code, restrict to the GEU case. */
19197 if (compare_code
== LTU
)
19199 HOST_WIDE_INT tmp
= ct
;
19202 compare_code
= reverse_condition (compare_code
);
19203 code
= reverse_condition (code
);
19208 PUT_CODE (compare_op
,
19209 reverse_condition_maybe_unordered
19210 (GET_CODE (compare_op
)));
19212 PUT_CODE (compare_op
,
19213 reverse_condition (GET_CODE (compare_op
)));
19217 if (reg_overlap_mentioned_p (out
, op0
)
19218 || reg_overlap_mentioned_p (out
, op1
))
19219 tmp
= gen_reg_rtx (mode
);
19221 if (mode
== DImode
)
19222 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19224 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19225 flags
, compare_op
));
19229 if (code
== GT
|| code
== GE
)
19230 code
= reverse_condition (code
);
19233 HOST_WIDE_INT tmp
= ct
;
19238 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19251 tmp
= expand_simple_binop (mode
, PLUS
,
19253 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19264 tmp
= expand_simple_binop (mode
, IOR
,
19266 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19268 else if (diff
== -1 && ct
)
19278 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19280 tmp
= expand_simple_binop (mode
, PLUS
,
19281 copy_rtx (tmp
), GEN_INT (cf
),
19282 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19290 * andl cf - ct, dest
19300 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19303 tmp
= expand_simple_binop (mode
, AND
,
19305 gen_int_mode (cf
- ct
, mode
),
19306 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19308 tmp
= expand_simple_binop (mode
, PLUS
,
19309 copy_rtx (tmp
), GEN_INT (ct
),
19310 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19313 if (!rtx_equal_p (tmp
, out
))
19314 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19321 enum machine_mode cmp_mode
= GET_MODE (op0
);
19324 tmp
= ct
, ct
= cf
, cf
= tmp
;
19327 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19329 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19331 /* We may be reversing unordered compare to normal compare, that
19332 is not valid in general (we may convert non-trapping condition
19333 to trapping one), however on i386 we currently emit all
19334 comparisons unordered. */
19335 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19336 code
= reverse_condition_maybe_unordered (code
);
19340 compare_code
= reverse_condition (compare_code
);
19341 code
= reverse_condition (code
);
19345 compare_code
= UNKNOWN
;
19346 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19347 && CONST_INT_P (op1
))
19349 if (op1
== const0_rtx
19350 && (code
== LT
|| code
== GE
))
19351 compare_code
= code
;
19352 else if (op1
== constm1_rtx
)
19356 else if (code
== GT
)
19361 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19362 if (compare_code
!= UNKNOWN
19363 && GET_MODE (op0
) == GET_MODE (out
)
19364 && (cf
== -1 || ct
== -1))
19366 /* If lea code below could be used, only optimize
19367 if it results in a 2 insn sequence. */
19369 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19370 || diff
== 3 || diff
== 5 || diff
== 9)
19371 || (compare_code
== LT
&& ct
== -1)
19372 || (compare_code
== GE
&& cf
== -1))
19375 * notl op1 (if necessary)
19383 code
= reverse_condition (code
);
19386 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19388 out
= expand_simple_binop (mode
, IOR
,
19390 out
, 1, OPTAB_DIRECT
);
19391 if (out
!= operands
[0])
19392 emit_move_insn (operands
[0], out
);
19399 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19400 || diff
== 3 || diff
== 5 || diff
== 9)
19401 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19403 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19409 * lea cf(dest*(ct-cf)),dest
19413 * This also catches the degenerate setcc-only case.
19419 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19422 /* On x86_64 the lea instruction operates on Pmode, so we need
19423 to get arithmetics done in proper mode to match. */
19425 tmp
= copy_rtx (out
);
19429 out1
= copy_rtx (out
);
19430 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19434 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19440 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19443 if (!rtx_equal_p (tmp
, out
))
19446 out
= force_operand (tmp
, copy_rtx (out
));
19448 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19450 if (!rtx_equal_p (out
, operands
[0]))
19451 emit_move_insn (operands
[0], copy_rtx (out
));
19457 * General case: Jumpful:
19458 * xorl dest,dest cmpl op1, op2
19459 * cmpl op1, op2 movl ct, dest
19460 * setcc dest jcc 1f
19461 * decl dest movl cf, dest
19462 * andl (cf-ct),dest 1:
19465 * Size 20. Size 14.
19467 * This is reasonably steep, but branch mispredict costs are
19468 * high on modern cpus, so consider failing only if optimizing
19472 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19473 && BRANCH_COST (optimize_insn_for_speed_p (),
19478 enum machine_mode cmp_mode
= GET_MODE (op0
);
19483 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19485 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19487 /* We may be reversing unordered compare to normal compare,
19488 that is not valid in general (we may convert non-trapping
19489 condition to trapping one), however on i386 we currently
19490 emit all comparisons unordered. */
19491 code
= reverse_condition_maybe_unordered (code
);
19495 code
= reverse_condition (code
);
19496 if (compare_code
!= UNKNOWN
)
19497 compare_code
= reverse_condition (compare_code
);
19501 if (compare_code
!= UNKNOWN
)
19503 /* notl op1 (if needed)
19508 For x < 0 (resp. x <= -1) there will be no notl,
19509 so if possible swap the constants to get rid of the
19511 True/false will be -1/0 while code below (store flag
19512 followed by decrement) is 0/-1, so the constants need
19513 to be exchanged once more. */
19515 if (compare_code
== GE
|| !cf
)
19517 code
= reverse_condition (code
);
19522 HOST_WIDE_INT tmp
= cf
;
19527 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19531 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19533 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19535 copy_rtx (out
), 1, OPTAB_DIRECT
);
19538 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19539 gen_int_mode (cf
- ct
, mode
),
19540 copy_rtx (out
), 1, OPTAB_DIRECT
);
19542 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19543 copy_rtx (out
), 1, OPTAB_DIRECT
);
19544 if (!rtx_equal_p (out
, operands
[0]))
19545 emit_move_insn (operands
[0], copy_rtx (out
));
19551 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19553 /* Try a few things more with specific constants and a variable. */
19556 rtx var
, orig_out
, out
, tmp
;
19558 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19561 /* If one of the two operands is an interesting constant, load a
19562 constant with the above and mask it in with a logical operation. */
19564 if (CONST_INT_P (operands
[2]))
19567 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19568 operands
[3] = constm1_rtx
, op
= and_optab
;
19569 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19570 operands
[3] = const0_rtx
, op
= ior_optab
;
19574 else if (CONST_INT_P (operands
[3]))
19577 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19578 operands
[2] = constm1_rtx
, op
= and_optab
;
19579 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19580 operands
[2] = const0_rtx
, op
= ior_optab
;
19587 orig_out
= operands
[0];
19588 tmp
= gen_reg_rtx (mode
);
19591 /* Recurse to get the constant loaded. */
19592 if (ix86_expand_int_movcc (operands
) == 0)
19595 /* Mask in the interesting variable. */
19596 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19598 if (!rtx_equal_p (out
, orig_out
))
19599 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19605 * For comparison with above,
19615 if (! nonimmediate_operand (operands
[2], mode
))
19616 operands
[2] = force_reg (mode
, operands
[2]);
19617 if (! nonimmediate_operand (operands
[3], mode
))
19618 operands
[3] = force_reg (mode
, operands
[3]);
19620 if (! register_operand (operands
[2], VOIDmode
)
19622 || ! register_operand (operands
[3], VOIDmode
)))
19623 operands
[2] = force_reg (mode
, operands
[2]);
19626 && ! register_operand (operands
[3], VOIDmode
))
19627 operands
[3] = force_reg (mode
, operands
[3]);
19629 emit_insn (compare_seq
);
19630 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19631 gen_rtx_IF_THEN_ELSE (mode
,
19632 compare_op
, operands
[2],
19637 /* Swap, force into registers, or otherwise massage the two operands
19638 to an sse comparison with a mask result. Thus we differ a bit from
19639 ix86_prepare_fp_compare_args which expects to produce a flags result.
19641 The DEST operand exists to help determine whether to commute commutative
19642 operators. The POP0/POP1 operands are updated in place. The new
19643 comparison code is returned, or UNKNOWN if not implementable. */
19645 static enum rtx_code
19646 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19647 rtx
*pop0
, rtx
*pop1
)
19655 /* AVX supports all the needed comparisons. */
19658 /* We have no LTGT as an operator. We could implement it with
19659 NE & ORDERED, but this requires an extra temporary. It's
19660 not clear that it's worth it. */
19667 /* These are supported directly. */
19674 /* AVX has 3 operand comparisons, no need to swap anything. */
19677 /* For commutative operators, try to canonicalize the destination
19678 operand to be first in the comparison - this helps reload to
19679 avoid extra moves. */
19680 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19688 /* These are not supported directly before AVX, and furthermore
19689 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19690 comparison operands to transform into something that is
19695 code
= swap_condition (code
);
19699 gcc_unreachable ();
19705 /* Detect conditional moves that exactly match min/max operational
19706 semantics. Note that this is IEEE safe, as long as we don't
19707 interchange the operands.
19709 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19710 and TRUE if the operation is successful and instructions are emitted. */
19713 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19714 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19716 enum machine_mode mode
;
19722 else if (code
== UNGE
)
19725 if_true
= if_false
;
19731 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19733 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19738 mode
= GET_MODE (dest
);
19740 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19741 but MODE may be a vector mode and thus not appropriate. */
19742 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19744 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19747 if_true
= force_reg (mode
, if_true
);
19748 v
= gen_rtvec (2, if_true
, if_false
);
19749 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19753 code
= is_min
? SMIN
: SMAX
;
19754 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19757 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19761 /* Expand an sse vector comparison. Return the register with the result. */
19764 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19765 rtx op_true
, rtx op_false
)
19767 enum machine_mode mode
= GET_MODE (dest
);
19768 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19771 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19772 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19773 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19776 || reg_overlap_mentioned_p (dest
, op_true
)
19777 || reg_overlap_mentioned_p (dest
, op_false
))
19778 dest
= gen_reg_rtx (mode
);
19780 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19781 if (cmp_mode
!= mode
)
19783 x
= force_reg (cmp_mode
, x
);
19784 convert_move (dest
, x
, false);
19787 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19792 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19793 operations. This is used for both scalar and vector conditional moves. */
19796 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19798 enum machine_mode mode
= GET_MODE (dest
);
19801 if (vector_all_ones_operand (op_true
, mode
)
19802 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19804 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19806 else if (op_false
== CONST0_RTX (mode
))
19808 op_true
= force_reg (mode
, op_true
);
19809 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19810 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19812 else if (op_true
== CONST0_RTX (mode
))
19814 op_false
= force_reg (mode
, op_false
);
19815 x
= gen_rtx_NOT (mode
, cmp
);
19816 x
= gen_rtx_AND (mode
, x
, op_false
);
19817 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19819 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19821 op_false
= force_reg (mode
, op_false
);
19822 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19823 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19825 else if (TARGET_XOP
)
19827 op_true
= force_reg (mode
, op_true
);
19829 if (!nonimmediate_operand (op_false
, mode
))
19830 op_false
= force_reg (mode
, op_false
);
19832 emit_insn (gen_rtx_SET (mode
, dest
,
19833 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19839 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19841 if (!nonimmediate_operand (op_true
, mode
))
19842 op_true
= force_reg (mode
, op_true
);
19844 op_false
= force_reg (mode
, op_false
);
19850 gen
= gen_sse4_1_blendvps
;
19854 gen
= gen_sse4_1_blendvpd
;
19862 gen
= gen_sse4_1_pblendvb
;
19863 dest
= gen_lowpart (V16QImode
, dest
);
19864 op_false
= gen_lowpart (V16QImode
, op_false
);
19865 op_true
= gen_lowpart (V16QImode
, op_true
);
19866 cmp
= gen_lowpart (V16QImode
, cmp
);
19871 gen
= gen_avx_blendvps256
;
19875 gen
= gen_avx_blendvpd256
;
19883 gen
= gen_avx2_pblendvb
;
19884 dest
= gen_lowpart (V32QImode
, dest
);
19885 op_false
= gen_lowpart (V32QImode
, op_false
);
19886 op_true
= gen_lowpart (V32QImode
, op_true
);
19887 cmp
= gen_lowpart (V32QImode
, cmp
);
19895 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19898 op_true
= force_reg (mode
, op_true
);
19900 t2
= gen_reg_rtx (mode
);
19902 t3
= gen_reg_rtx (mode
);
19906 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19907 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19909 x
= gen_rtx_NOT (mode
, cmp
);
19910 x
= gen_rtx_AND (mode
, x
, op_false
);
19911 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19913 x
= gen_rtx_IOR (mode
, t3
, t2
);
19914 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19919 /* Expand a floating-point conditional move. Return true if successful. */
19922 ix86_expand_fp_movcc (rtx operands
[])
19924 enum machine_mode mode
= GET_MODE (operands
[0]);
19925 enum rtx_code code
= GET_CODE (operands
[1]);
19926 rtx tmp
, compare_op
;
19927 rtx op0
= XEXP (operands
[1], 0);
19928 rtx op1
= XEXP (operands
[1], 1);
19930 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19932 enum machine_mode cmode
;
19934 /* Since we've no cmove for sse registers, don't force bad register
19935 allocation just to gain access to it. Deny movcc when the
19936 comparison mode doesn't match the move mode. */
19937 cmode
= GET_MODE (op0
);
19938 if (cmode
== VOIDmode
)
19939 cmode
= GET_MODE (op1
);
19943 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19944 if (code
== UNKNOWN
)
19947 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19948 operands
[2], operands
[3]))
19951 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19952 operands
[2], operands
[3]);
19953 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19957 if (GET_MODE (op0
) == TImode
19958 || (GET_MODE (op0
) == DImode
19962 /* The floating point conditional move instructions don't directly
19963 support conditions resulting from a signed integer comparison. */
19965 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19966 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19968 tmp
= gen_reg_rtx (QImode
);
19969 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19971 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19974 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19975 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19976 operands
[2], operands
[3])));
19981 /* Expand a floating-point vector conditional move; a vcond operation
19982 rather than a movcc operation. */
19985 ix86_expand_fp_vcond (rtx operands
[])
19987 enum rtx_code code
= GET_CODE (operands
[3]);
19990 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19991 &operands
[4], &operands
[5]);
19992 if (code
== UNKNOWN
)
19995 switch (GET_CODE (operands
[3]))
19998 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19999 operands
[5], operands
[0], operands
[0]);
20000 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20001 operands
[5], operands
[1], operands
[2]);
20005 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20006 operands
[5], operands
[0], operands
[0]);
20007 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20008 operands
[5], operands
[1], operands
[2]);
20012 gcc_unreachable ();
20014 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20016 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20020 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20021 operands
[5], operands
[1], operands
[2]))
20024 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20025 operands
[1], operands
[2]);
20026 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20030 /* Expand a signed/unsigned integral vector conditional move. */
20033 ix86_expand_int_vcond (rtx operands
[])
20035 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20036 enum machine_mode mode
= GET_MODE (operands
[4]);
20037 enum rtx_code code
= GET_CODE (operands
[3]);
20038 bool negate
= false;
20041 cop0
= operands
[4];
20042 cop1
= operands
[5];
20044 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20045 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20046 if ((code
== LT
|| code
== GE
)
20047 && data_mode
== mode
20048 && cop1
== CONST0_RTX (mode
)
20049 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20050 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20051 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20052 && (GET_MODE_SIZE (data_mode
) == 16
20053 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20055 rtx negop
= operands
[2 - (code
== LT
)];
20056 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20057 if (negop
== CONST1_RTX (data_mode
))
20059 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20060 operands
[0], 1, OPTAB_DIRECT
);
20061 if (res
!= operands
[0])
20062 emit_move_insn (operands
[0], res
);
20065 else if (GET_MODE_INNER (data_mode
) != DImode
20066 && vector_all_ones_operand (negop
, data_mode
))
20068 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20069 operands
[0], 0, OPTAB_DIRECT
);
20070 if (res
!= operands
[0])
20071 emit_move_insn (operands
[0], res
);
20076 if (!nonimmediate_operand (cop1
, mode
))
20077 cop1
= force_reg (mode
, cop1
);
20078 if (!general_operand (operands
[1], data_mode
))
20079 operands
[1] = force_reg (data_mode
, operands
[1]);
20080 if (!general_operand (operands
[2], data_mode
))
20081 operands
[2] = force_reg (data_mode
, operands
[2]);
20083 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20085 && (mode
== V16QImode
|| mode
== V8HImode
20086 || mode
== V4SImode
|| mode
== V2DImode
))
20090 /* Canonicalize the comparison to EQ, GT, GTU. */
20101 code
= reverse_condition (code
);
20107 code
= reverse_condition (code
);
20113 code
= swap_condition (code
);
20114 x
= cop0
, cop0
= cop1
, cop1
= x
;
20118 gcc_unreachable ();
20121 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20122 if (mode
== V2DImode
)
20127 /* SSE4.1 supports EQ. */
20128 if (!TARGET_SSE4_1
)
20134 /* SSE4.2 supports GT/GTU. */
20135 if (!TARGET_SSE4_2
)
20140 gcc_unreachable ();
20144 /* Unsigned parallel compare is not supported by the hardware.
20145 Play some tricks to turn this into a signed comparison
20149 cop0
= force_reg (mode
, cop0
);
20159 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20163 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20164 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20165 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20166 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20168 gcc_unreachable ();
20170 /* Subtract (-(INT MAX) - 1) from both operands to make
20172 mask
= ix86_build_signbit_mask (mode
, true, false);
20173 t1
= gen_reg_rtx (mode
);
20174 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20176 t2
= gen_reg_rtx (mode
);
20177 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20189 /* Perform a parallel unsigned saturating subtraction. */
20190 x
= gen_reg_rtx (mode
);
20191 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20192 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20195 cop1
= CONST0_RTX (mode
);
20201 gcc_unreachable ();
20206 /* Allow the comparison to be done in one mode, but the movcc to
20207 happen in another mode. */
20208 if (data_mode
== mode
)
20210 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20211 operands
[1+negate
], operands
[2-negate
]);
20215 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20216 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20218 operands
[1+negate
], operands
[2-negate
]);
20219 x
= gen_lowpart (data_mode
, x
);
20222 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20223 operands
[2-negate
]);
20227 /* Expand a variable vector permutation. */
20230 ix86_expand_vec_perm (rtx operands
[])
20232 rtx target
= operands
[0];
20233 rtx op0
= operands
[1];
20234 rtx op1
= operands
[2];
20235 rtx mask
= operands
[3];
20236 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20237 enum machine_mode mode
= GET_MODE (op0
);
20238 enum machine_mode maskmode
= GET_MODE (mask
);
20240 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20242 /* Number of elements in the vector. */
20243 w
= GET_MODE_NUNITS (mode
);
20244 e
= GET_MODE_UNIT_SIZE (mode
);
20245 gcc_assert (w
<= 32);
20249 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20251 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20252 an constant shuffle operand. With a tiny bit of effort we can
20253 use VPERMD instead. A re-interpretation stall for V4DFmode is
20254 unfortunate but there's no avoiding it.
20255 Similarly for V16HImode we don't have instructions for variable
20256 shuffling, while for V32QImode we can use after preparing suitable
20257 masks vpshufb; vpshufb; vpermq; vpor. */
20259 if (mode
== V16HImode
)
20261 maskmode
= mode
= V32QImode
;
20267 maskmode
= mode
= V8SImode
;
20271 t1
= gen_reg_rtx (maskmode
);
20273 /* Replicate the low bits of the V4DImode mask into V8SImode:
20275 t1 = { A A B B C C D D }. */
20276 for (i
= 0; i
< w
/ 2; ++i
)
20277 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20278 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20279 vt
= force_reg (maskmode
, vt
);
20280 mask
= gen_lowpart (maskmode
, mask
);
20281 if (maskmode
== V8SImode
)
20282 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20284 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20286 /* Multiply the shuffle indicies by two. */
20287 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20290 /* Add one to the odd shuffle indicies:
20291 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20292 for (i
= 0; i
< w
/ 2; ++i
)
20294 vec
[i
* 2] = const0_rtx
;
20295 vec
[i
* 2 + 1] = const1_rtx
;
20297 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20298 vt
= force_const_mem (maskmode
, vt
);
20299 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20302 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20303 operands
[3] = mask
= t1
;
20304 target
= gen_lowpart (mode
, target
);
20305 op0
= gen_lowpart (mode
, op0
);
20306 op1
= gen_lowpart (mode
, op1
);
20312 /* The VPERMD and VPERMPS instructions already properly ignore
20313 the high bits of the shuffle elements. No need for us to
20314 perform an AND ourselves. */
20315 if (one_operand_shuffle
)
20316 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20319 t1
= gen_reg_rtx (V8SImode
);
20320 t2
= gen_reg_rtx (V8SImode
);
20321 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20322 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20328 mask
= gen_lowpart (V8SFmode
, mask
);
20329 if (one_operand_shuffle
)
20330 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20333 t1
= gen_reg_rtx (V8SFmode
);
20334 t2
= gen_reg_rtx (V8SFmode
);
20335 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20336 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20342 /* By combining the two 128-bit input vectors into one 256-bit
20343 input vector, we can use VPERMD and VPERMPS for the full
20344 two-operand shuffle. */
20345 t1
= gen_reg_rtx (V8SImode
);
20346 t2
= gen_reg_rtx (V8SImode
);
20347 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20348 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20349 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20350 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20354 t1
= gen_reg_rtx (V8SFmode
);
20355 t2
= gen_reg_rtx (V8SImode
);
20356 mask
= gen_lowpart (V4SImode
, mask
);
20357 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20358 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20359 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20360 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20364 t1
= gen_reg_rtx (V32QImode
);
20365 t2
= gen_reg_rtx (V32QImode
);
20366 t3
= gen_reg_rtx (V32QImode
);
20367 vt2
= GEN_INT (128);
20368 for (i
= 0; i
< 32; i
++)
20370 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20371 vt
= force_reg (V32QImode
, vt
);
20372 for (i
= 0; i
< 32; i
++)
20373 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20374 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20375 vt2
= force_reg (V32QImode
, vt2
);
20376 /* From mask create two adjusted masks, which contain the same
20377 bits as mask in the low 7 bits of each vector element.
20378 The first mask will have the most significant bit clear
20379 if it requests element from the same 128-bit lane
20380 and MSB set if it requests element from the other 128-bit lane.
20381 The second mask will have the opposite values of the MSB,
20382 and additionally will have its 128-bit lanes swapped.
20383 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20384 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20385 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20386 stands for other 12 bytes. */
20387 /* The bit whether element is from the same lane or the other
20388 lane is bit 4, so shift it up by 3 to the MSB position. */
20389 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20390 gen_lowpart (V4DImode
, mask
),
20392 /* Clear MSB bits from the mask just in case it had them set. */
20393 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20394 /* After this t1 will have MSB set for elements from other lane. */
20395 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20396 /* Clear bits other than MSB. */
20397 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20398 /* Or in the lower bits from mask into t3. */
20399 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20400 /* And invert MSB bits in t1, so MSB is set for elements from the same
20402 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20403 /* Swap 128-bit lanes in t3. */
20404 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20405 gen_lowpart (V4DImode
, t3
),
20406 const2_rtx
, GEN_INT (3),
20407 const0_rtx
, const1_rtx
));
20408 /* And or in the lower bits from mask into t1. */
20409 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20410 if (one_operand_shuffle
)
20412 /* Each of these shuffles will put 0s in places where
20413 element from the other 128-bit lane is needed, otherwise
20414 will shuffle in the requested value. */
20415 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20416 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20417 /* For t3 the 128-bit lanes are swapped again. */
20418 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20419 gen_lowpart (V4DImode
, t3
),
20420 const2_rtx
, GEN_INT (3),
20421 const0_rtx
, const1_rtx
));
20422 /* And oring both together leads to the result. */
20423 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20427 t4
= gen_reg_rtx (V32QImode
);
20428 /* Similarly to the above one_operand_shuffle code,
20429 just for repeated twice for each operand. merge_two:
20430 code will merge the two results together. */
20431 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20432 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20433 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20434 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20435 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20436 gen_lowpart (V4DImode
, t4
),
20437 const2_rtx
, GEN_INT (3),
20438 const0_rtx
, const1_rtx
));
20439 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20440 gen_lowpart (V4DImode
, t3
),
20441 const2_rtx
, GEN_INT (3),
20442 const0_rtx
, const1_rtx
));
20443 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20444 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20450 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20457 /* The XOP VPPERM insn supports three inputs. By ignoring the
20458 one_operand_shuffle special case, we avoid creating another
20459 set of constant vectors in memory. */
20460 one_operand_shuffle
= false;
20462 /* mask = mask & {2*w-1, ...} */
20463 vt
= GEN_INT (2*w
- 1);
20467 /* mask = mask & {w-1, ...} */
20468 vt
= GEN_INT (w
- 1);
20471 for (i
= 0; i
< w
; i
++)
20473 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20474 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20475 NULL_RTX
, 0, OPTAB_DIRECT
);
20477 /* For non-QImode operations, convert the word permutation control
20478 into a byte permutation control. */
20479 if (mode
!= V16QImode
)
20481 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20482 GEN_INT (exact_log2 (e
)),
20483 NULL_RTX
, 0, OPTAB_DIRECT
);
20485 /* Convert mask to vector of chars. */
20486 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20488 /* Replicate each of the input bytes into byte positions:
20489 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20490 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20491 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20492 for (i
= 0; i
< 16; ++i
)
20493 vec
[i
] = GEN_INT (i
/e
* e
);
20494 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20495 vt
= force_const_mem (V16QImode
, vt
);
20497 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20499 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20501 /* Convert it into the byte positions by doing
20502 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20503 for (i
= 0; i
< 16; ++i
)
20504 vec
[i
] = GEN_INT (i
% e
);
20505 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20506 vt
= force_const_mem (V16QImode
, vt
);
20507 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20510 /* The actual shuffle operations all operate on V16QImode. */
20511 op0
= gen_lowpart (V16QImode
, op0
);
20512 op1
= gen_lowpart (V16QImode
, op1
);
20513 target
= gen_lowpart (V16QImode
, target
);
20517 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20519 else if (one_operand_shuffle
)
20521 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20528 /* Shuffle the two input vectors independently. */
20529 t1
= gen_reg_rtx (V16QImode
);
20530 t2
= gen_reg_rtx (V16QImode
);
20531 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20532 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20535 /* Then merge them together. The key is whether any given control
20536 element contained a bit set that indicates the second word. */
20537 mask
= operands
[3];
20539 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20541 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20542 more shuffle to convert the V2DI input mask into a V4SI
20543 input mask. At which point the masking that expand_int_vcond
20544 will work as desired. */
20545 rtx t3
= gen_reg_rtx (V4SImode
);
20546 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20547 const0_rtx
, const0_rtx
,
20548 const2_rtx
, const2_rtx
));
20550 maskmode
= V4SImode
;
20554 for (i
= 0; i
< w
; i
++)
20556 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20557 vt
= force_reg (maskmode
, vt
);
20558 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20559 NULL_RTX
, 0, OPTAB_DIRECT
);
20561 xops
[0] = gen_lowpart (mode
, operands
[0]);
20562 xops
[1] = gen_lowpart (mode
, t2
);
20563 xops
[2] = gen_lowpart (mode
, t1
);
20564 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20567 ok
= ix86_expand_int_vcond (xops
);
20572 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20573 true if we should do zero extension, else sign extension. HIGH_P is
20574 true if we want the N/2 high elements, else the low elements. */
20577 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20579 enum machine_mode imode
= GET_MODE (src
);
20584 rtx (*unpack
)(rtx
, rtx
);
20585 rtx (*extract
)(rtx
, rtx
) = NULL
;
20586 enum machine_mode halfmode
= BLKmode
;
20592 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20594 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20595 halfmode
= V16QImode
;
20597 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20601 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20603 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20604 halfmode
= V8HImode
;
20606 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20610 unpack
= gen_avx2_zero_extendv4siv4di2
;
20612 unpack
= gen_avx2_sign_extendv4siv4di2
;
20613 halfmode
= V4SImode
;
20615 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20619 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20621 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20625 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20627 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20631 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20633 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20636 gcc_unreachable ();
20639 if (GET_MODE_SIZE (imode
) == 32)
20641 tmp
= gen_reg_rtx (halfmode
);
20642 emit_insn (extract (tmp
, src
));
20646 /* Shift higher 8 bytes to lower 8 bytes. */
20647 tmp
= gen_reg_rtx (imode
);
20648 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20649 gen_lowpart (V1TImode
, src
),
20655 emit_insn (unpack (dest
, tmp
));
20659 rtx (*unpack
)(rtx
, rtx
, rtx
);
20665 unpack
= gen_vec_interleave_highv16qi
;
20667 unpack
= gen_vec_interleave_lowv16qi
;
20671 unpack
= gen_vec_interleave_highv8hi
;
20673 unpack
= gen_vec_interleave_lowv8hi
;
20677 unpack
= gen_vec_interleave_highv4si
;
20679 unpack
= gen_vec_interleave_lowv4si
;
20682 gcc_unreachable ();
20686 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20688 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20689 src
, pc_rtx
, pc_rtx
);
20691 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20695 /* Expand conditional increment or decrement using adb/sbb instructions.
20696 The default case using setcc followed by the conditional move can be
20697 done by generic code. */
20699 ix86_expand_int_addcc (rtx operands
[])
20701 enum rtx_code code
= GET_CODE (operands
[1]);
20703 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20705 rtx val
= const0_rtx
;
20706 bool fpcmp
= false;
20707 enum machine_mode mode
;
20708 rtx op0
= XEXP (operands
[1], 0);
20709 rtx op1
= XEXP (operands
[1], 1);
20711 if (operands
[3] != const1_rtx
20712 && operands
[3] != constm1_rtx
)
20714 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20716 code
= GET_CODE (compare_op
);
20718 flags
= XEXP (compare_op
, 0);
20720 if (GET_MODE (flags
) == CCFPmode
20721 || GET_MODE (flags
) == CCFPUmode
)
20724 code
= ix86_fp_compare_code_to_integer (code
);
20731 PUT_CODE (compare_op
,
20732 reverse_condition_maybe_unordered
20733 (GET_CODE (compare_op
)));
20735 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20738 mode
= GET_MODE (operands
[0]);
20740 /* Construct either adc or sbb insn. */
20741 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20746 insn
= gen_subqi3_carry
;
20749 insn
= gen_subhi3_carry
;
20752 insn
= gen_subsi3_carry
;
20755 insn
= gen_subdi3_carry
;
20758 gcc_unreachable ();
20766 insn
= gen_addqi3_carry
;
20769 insn
= gen_addhi3_carry
;
20772 insn
= gen_addsi3_carry
;
20775 insn
= gen_adddi3_carry
;
20778 gcc_unreachable ();
20781 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20787 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20788 but works for floating pointer parameters and nonoffsetable memories.
20789 For pushes, it returns just stack offsets; the values will be saved
20790 in the right order. Maximally three parts are generated. */
20793 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20798 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20800 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20802 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20803 gcc_assert (size
>= 2 && size
<= 4);
20805 /* Optimize constant pool reference to immediates. This is used by fp
20806 moves, that force all constants to memory to allow combining. */
20807 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20809 rtx tmp
= maybe_get_pool_constant (operand
);
20814 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20816 /* The only non-offsetable memories we handle are pushes. */
20817 int ok
= push_operand (operand
, VOIDmode
);
20821 operand
= copy_rtx (operand
);
20822 PUT_MODE (operand
, word_mode
);
20823 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20827 if (GET_CODE (operand
) == CONST_VECTOR
)
20829 enum machine_mode imode
= int_mode_for_mode (mode
);
20830 /* Caution: if we looked through a constant pool memory above,
20831 the operand may actually have a different mode now. That's
20832 ok, since we want to pun this all the way back to an integer. */
20833 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20834 gcc_assert (operand
!= NULL
);
20840 if (mode
== DImode
)
20841 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20846 if (REG_P (operand
))
20848 gcc_assert (reload_completed
);
20849 for (i
= 0; i
< size
; i
++)
20850 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20852 else if (offsettable_memref_p (operand
))
20854 operand
= adjust_address (operand
, SImode
, 0);
20855 parts
[0] = operand
;
20856 for (i
= 1; i
< size
; i
++)
20857 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20859 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20864 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20868 real_to_target (l
, &r
, mode
);
20869 parts
[3] = gen_int_mode (l
[3], SImode
);
20870 parts
[2] = gen_int_mode (l
[2], SImode
);
20873 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
20874 long double may not be 80-bit. */
20875 real_to_target (l
, &r
, mode
);
20876 parts
[2] = gen_int_mode (l
[2], SImode
);
20879 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20882 gcc_unreachable ();
20884 parts
[1] = gen_int_mode (l
[1], SImode
);
20885 parts
[0] = gen_int_mode (l
[0], SImode
);
20888 gcc_unreachable ();
20893 if (mode
== TImode
)
20894 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20895 if (mode
== XFmode
|| mode
== TFmode
)
20897 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20898 if (REG_P (operand
))
20900 gcc_assert (reload_completed
);
20901 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20902 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20904 else if (offsettable_memref_p (operand
))
20906 operand
= adjust_address (operand
, DImode
, 0);
20907 parts
[0] = operand
;
20908 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20910 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20915 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20916 real_to_target (l
, &r
, mode
);
20918 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20919 if (HOST_BITS_PER_WIDE_INT
>= 64)
20922 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20923 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20926 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20928 if (upper_mode
== SImode
)
20929 parts
[1] = gen_int_mode (l
[2], SImode
);
20930 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20933 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20934 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20937 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20940 gcc_unreachable ();
20947 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20948 Return false when normal moves are needed; true when all required
20949 insns have been emitted. Operands 2-4 contain the input values
20950 int the correct order; operands 5-7 contain the output values. */
20953 ix86_split_long_move (rtx operands
[])
20958 int collisions
= 0;
20959 enum machine_mode mode
= GET_MODE (operands
[0]);
20960 bool collisionparts
[4];
20962 /* The DFmode expanders may ask us to move double.
20963 For 64bit target this is single move. By hiding the fact
20964 here we simplify i386.md splitters. */
20965 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20967 /* Optimize constant pool reference to immediates. This is used by
20968 fp moves, that force all constants to memory to allow combining. */
20970 if (MEM_P (operands
[1])
20971 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20972 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20973 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20974 if (push_operand (operands
[0], VOIDmode
))
20976 operands
[0] = copy_rtx (operands
[0]);
20977 PUT_MODE (operands
[0], word_mode
);
20980 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20981 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20982 emit_move_insn (operands
[0], operands
[1]);
20986 /* The only non-offsettable memory we handle is push. */
20987 if (push_operand (operands
[0], VOIDmode
))
20990 gcc_assert (!MEM_P (operands
[0])
20991 || offsettable_memref_p (operands
[0]));
20993 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20994 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20996 /* When emitting push, take care for source operands on the stack. */
20997 if (push
&& MEM_P (operands
[1])
20998 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21000 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21002 /* Compensate for the stack decrement by 4. */
21003 if (!TARGET_64BIT
&& nparts
== 3
21004 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21005 src_base
= plus_constant (Pmode
, src_base
, 4);
21007 /* src_base refers to the stack pointer and is
21008 automatically decreased by emitted push. */
21009 for (i
= 0; i
< nparts
; i
++)
21010 part
[1][i
] = change_address (part
[1][i
],
21011 GET_MODE (part
[1][i
]), src_base
);
21014 /* We need to do copy in the right order in case an address register
21015 of the source overlaps the destination. */
21016 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21020 for (i
= 0; i
< nparts
; i
++)
21023 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21024 if (collisionparts
[i
])
21028 /* Collision in the middle part can be handled by reordering. */
21029 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21031 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21032 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21034 else if (collisions
== 1
21036 && (collisionparts
[1] || collisionparts
[2]))
21038 if (collisionparts
[1])
21040 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21041 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21045 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21046 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21050 /* If there are more collisions, we can't handle it by reordering.
21051 Do an lea to the last part and use only one colliding move. */
21052 else if (collisions
> 1)
21058 base
= part
[0][nparts
- 1];
21060 /* Handle the case when the last part isn't valid for lea.
21061 Happens in 64-bit mode storing the 12-byte XFmode. */
21062 if (GET_MODE (base
) != Pmode
)
21063 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21065 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21066 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21067 for (i
= 1; i
< nparts
; i
++)
21069 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21070 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21081 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21082 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21083 stack_pointer_rtx
, GEN_INT (-4)));
21084 emit_move_insn (part
[0][2], part
[1][2]);
21086 else if (nparts
== 4)
21088 emit_move_insn (part
[0][3], part
[1][3]);
21089 emit_move_insn (part
[0][2], part
[1][2]);
21094 /* In 64bit mode we don't have 32bit push available. In case this is
21095 register, it is OK - we will just use larger counterpart. We also
21096 retype memory - these comes from attempt to avoid REX prefix on
21097 moving of second half of TFmode value. */
21098 if (GET_MODE (part
[1][1]) == SImode
)
21100 switch (GET_CODE (part
[1][1]))
21103 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21107 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21111 gcc_unreachable ();
21114 if (GET_MODE (part
[1][0]) == SImode
)
21115 part
[1][0] = part
[1][1];
21118 emit_move_insn (part
[0][1], part
[1][1]);
21119 emit_move_insn (part
[0][0], part
[1][0]);
21123 /* Choose correct order to not overwrite the source before it is copied. */
21124 if ((REG_P (part
[0][0])
21125 && REG_P (part
[1][1])
21126 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21128 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21130 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21132 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21134 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21136 operands
[2 + i
] = part
[0][j
];
21137 operands
[6 + i
] = part
[1][j
];
21142 for (i
= 0; i
< nparts
; i
++)
21144 operands
[2 + i
] = part
[0][i
];
21145 operands
[6 + i
] = part
[1][i
];
21149 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21150 if (optimize_insn_for_size_p ())
21152 for (j
= 0; j
< nparts
- 1; j
++)
21153 if (CONST_INT_P (operands
[6 + j
])
21154 && operands
[6 + j
] != const0_rtx
21155 && REG_P (operands
[2 + j
]))
21156 for (i
= j
; i
< nparts
- 1; i
++)
21157 if (CONST_INT_P (operands
[7 + i
])
21158 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21159 operands
[7 + i
] = operands
[2 + j
];
21162 for (i
= 0; i
< nparts
; i
++)
21163 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21168 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21169 left shift by a constant, either using a single shift or
21170 a sequence of add instructions. */
21173 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21175 rtx (*insn
)(rtx
, rtx
, rtx
);
21178 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21179 && !optimize_insn_for_size_p ()))
21181 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21182 while (count
-- > 0)
21183 emit_insn (insn (operand
, operand
, operand
));
21187 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21188 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21193 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21195 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21196 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21197 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21199 rtx low
[2], high
[2];
21202 if (CONST_INT_P (operands
[2]))
21204 split_double_mode (mode
, operands
, 2, low
, high
);
21205 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21207 if (count
>= half_width
)
21209 emit_move_insn (high
[0], low
[1]);
21210 emit_move_insn (low
[0], const0_rtx
);
21212 if (count
> half_width
)
21213 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21217 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21219 if (!rtx_equal_p (operands
[0], operands
[1]))
21220 emit_move_insn (operands
[0], operands
[1]);
21222 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21223 ix86_expand_ashl_const (low
[0], count
, mode
);
21228 split_double_mode (mode
, operands
, 1, low
, high
);
21230 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21232 if (operands
[1] == const1_rtx
)
21234 /* Assuming we've chosen a QImode capable registers, then 1 << N
21235 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21236 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21238 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21240 ix86_expand_clear (low
[0]);
21241 ix86_expand_clear (high
[0]);
21242 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21244 d
= gen_lowpart (QImode
, low
[0]);
21245 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21246 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21247 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21249 d
= gen_lowpart (QImode
, high
[0]);
21250 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21251 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21252 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21255 /* Otherwise, we can get the same results by manually performing
21256 a bit extract operation on bit 5/6, and then performing the two
21257 shifts. The two methods of getting 0/1 into low/high are exactly
21258 the same size. Avoiding the shift in the bit extract case helps
21259 pentium4 a bit; no one else seems to care much either way. */
21262 enum machine_mode half_mode
;
21263 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21264 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21265 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21266 HOST_WIDE_INT bits
;
21269 if (mode
== DImode
)
21271 half_mode
= SImode
;
21272 gen_lshr3
= gen_lshrsi3
;
21273 gen_and3
= gen_andsi3
;
21274 gen_xor3
= gen_xorsi3
;
21279 half_mode
= DImode
;
21280 gen_lshr3
= gen_lshrdi3
;
21281 gen_and3
= gen_anddi3
;
21282 gen_xor3
= gen_xordi3
;
21286 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21287 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21289 x
= gen_lowpart (half_mode
, operands
[2]);
21290 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21292 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21293 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21294 emit_move_insn (low
[0], high
[0]);
21295 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21298 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21299 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21303 if (operands
[1] == constm1_rtx
)
21305 /* For -1 << N, we can avoid the shld instruction, because we
21306 know that we're shifting 0...31/63 ones into a -1. */
21307 emit_move_insn (low
[0], constm1_rtx
);
21308 if (optimize_insn_for_size_p ())
21309 emit_move_insn (high
[0], low
[0]);
21311 emit_move_insn (high
[0], constm1_rtx
);
21315 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21317 if (!rtx_equal_p (operands
[0], operands
[1]))
21318 emit_move_insn (operands
[0], operands
[1]);
21320 split_double_mode (mode
, operands
, 1, low
, high
);
21321 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21324 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21326 if (TARGET_CMOVE
&& scratch
)
21328 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21329 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21331 ix86_expand_clear (scratch
);
21332 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21336 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21337 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21339 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21344 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21346 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21347 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21348 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21349 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21351 rtx low
[2], high
[2];
21354 if (CONST_INT_P (operands
[2]))
21356 split_double_mode (mode
, operands
, 2, low
, high
);
21357 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21359 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21361 emit_move_insn (high
[0], high
[1]);
21362 emit_insn (gen_ashr3 (high
[0], high
[0],
21363 GEN_INT (half_width
- 1)));
21364 emit_move_insn (low
[0], high
[0]);
21367 else if (count
>= half_width
)
21369 emit_move_insn (low
[0], high
[1]);
21370 emit_move_insn (high
[0], low
[0]);
21371 emit_insn (gen_ashr3 (high
[0], high
[0],
21372 GEN_INT (half_width
- 1)));
21374 if (count
> half_width
)
21375 emit_insn (gen_ashr3 (low
[0], low
[0],
21376 GEN_INT (count
- half_width
)));
21380 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21382 if (!rtx_equal_p (operands
[0], operands
[1]))
21383 emit_move_insn (operands
[0], operands
[1]);
21385 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21386 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21391 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21393 if (!rtx_equal_p (operands
[0], operands
[1]))
21394 emit_move_insn (operands
[0], operands
[1]);
21396 split_double_mode (mode
, operands
, 1, low
, high
);
21398 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21399 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21401 if (TARGET_CMOVE
&& scratch
)
21403 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21404 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21406 emit_move_insn (scratch
, high
[0]);
21407 emit_insn (gen_ashr3 (scratch
, scratch
,
21408 GEN_INT (half_width
- 1)));
21409 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21414 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21415 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21417 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21423 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21425 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21426 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21427 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21428 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21430 rtx low
[2], high
[2];
21433 if (CONST_INT_P (operands
[2]))
21435 split_double_mode (mode
, operands
, 2, low
, high
);
21436 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21438 if (count
>= half_width
)
21440 emit_move_insn (low
[0], high
[1]);
21441 ix86_expand_clear (high
[0]);
21443 if (count
> half_width
)
21444 emit_insn (gen_lshr3 (low
[0], low
[0],
21445 GEN_INT (count
- half_width
)));
21449 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21451 if (!rtx_equal_p (operands
[0], operands
[1]))
21452 emit_move_insn (operands
[0], operands
[1]);
21454 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21455 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21460 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21462 if (!rtx_equal_p (operands
[0], operands
[1]))
21463 emit_move_insn (operands
[0], operands
[1]);
21465 split_double_mode (mode
, operands
, 1, low
, high
);
21467 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21468 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21470 if (TARGET_CMOVE
&& scratch
)
21472 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21473 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21475 ix86_expand_clear (scratch
);
21476 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21481 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21482 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21484 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21489 /* Predict just emitted jump instruction to be taken with probability PROB. */
21491 predict_jump (int prob
)
21493 rtx insn
= get_last_insn ();
21494 gcc_assert (JUMP_P (insn
));
21495 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21498 /* Helper function for the string operations below. Dest VARIABLE whether
21499 it is aligned to VALUE bytes. If true, jump to the label. */
21501 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21503 rtx label
= gen_label_rtx ();
21504 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21505 if (GET_MODE (variable
) == DImode
)
21506 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21508 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21509 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21512 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21514 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21518 /* Adjust COUNTER by the VALUE. */
21520 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21522 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21523 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21525 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21528 /* Zero extend possibly SImode EXP to Pmode register. */
21530 ix86_zero_extend_to_Pmode (rtx exp
)
21532 if (GET_MODE (exp
) != Pmode
)
21533 exp
= convert_to_mode (Pmode
, exp
, 1);
21534 return force_reg (Pmode
, exp
);
21537 /* Divide COUNTREG by SCALE. */
21539 scale_counter (rtx countreg
, int scale
)
21545 if (CONST_INT_P (countreg
))
21546 return GEN_INT (INTVAL (countreg
) / scale
);
21547 gcc_assert (REG_P (countreg
));
21549 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21550 GEN_INT (exact_log2 (scale
)),
21551 NULL
, 1, OPTAB_DIRECT
);
21555 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21556 DImode for constant loop counts. */
21558 static enum machine_mode
21559 counter_mode (rtx count_exp
)
21561 if (GET_MODE (count_exp
) != VOIDmode
)
21562 return GET_MODE (count_exp
);
21563 if (!CONST_INT_P (count_exp
))
21565 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21570 /* When SRCPTR is non-NULL, output simple loop to move memory
21571 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21572 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21573 equivalent loop to set memory by VALUE (supposed to be in MODE).
21575 The size is rounded down to whole number of chunk size moved at once.
21576 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21580 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21581 rtx destptr
, rtx srcptr
, rtx value
,
21582 rtx count
, enum machine_mode mode
, int unroll
,
21585 rtx out_label
, top_label
, iter
, tmp
;
21586 enum machine_mode iter_mode
= counter_mode (count
);
21587 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21588 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21594 top_label
= gen_label_rtx ();
21595 out_label
= gen_label_rtx ();
21596 iter
= gen_reg_rtx (iter_mode
);
21598 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21599 NULL
, 1, OPTAB_DIRECT
);
21600 /* Those two should combine. */
21601 if (piece_size
== const1_rtx
)
21603 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21605 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21607 emit_move_insn (iter
, const0_rtx
);
21609 emit_label (top_label
);
21611 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21612 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21613 destmem
= change_address (destmem
, mode
, x_addr
);
21617 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21618 srcmem
= change_address (srcmem
, mode
, y_addr
);
21620 /* When unrolling for chips that reorder memory reads and writes,
21621 we can save registers by using single temporary.
21622 Also using 4 temporaries is overkill in 32bit mode. */
21623 if (!TARGET_64BIT
&& 0)
21625 for (i
= 0; i
< unroll
; i
++)
21630 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21632 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21634 emit_move_insn (destmem
, srcmem
);
21640 gcc_assert (unroll
<= 4);
21641 for (i
= 0; i
< unroll
; i
++)
21643 tmpreg
[i
] = gen_reg_rtx (mode
);
21647 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21649 emit_move_insn (tmpreg
[i
], srcmem
);
21651 for (i
= 0; i
< unroll
; i
++)
21656 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21658 emit_move_insn (destmem
, tmpreg
[i
]);
21663 for (i
= 0; i
< unroll
; i
++)
21667 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21668 emit_move_insn (destmem
, value
);
21671 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21672 true, OPTAB_LIB_WIDEN
);
21674 emit_move_insn (iter
, tmp
);
21676 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21678 if (expected_size
!= -1)
21680 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21681 if (expected_size
== 0)
21683 else if (expected_size
> REG_BR_PROB_BASE
)
21684 predict_jump (REG_BR_PROB_BASE
- 1);
21686 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21689 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21690 iter
= ix86_zero_extend_to_Pmode (iter
);
21691 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21692 true, OPTAB_LIB_WIDEN
);
21693 if (tmp
!= destptr
)
21694 emit_move_insn (destptr
, tmp
);
21697 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21698 true, OPTAB_LIB_WIDEN
);
21700 emit_move_insn (srcptr
, tmp
);
21702 emit_label (out_label
);
21705 /* Output "rep; mov" instruction.
21706 Arguments have same meaning as for previous function */
21708 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21709 rtx destptr
, rtx srcptr
,
21711 enum machine_mode mode
)
21716 HOST_WIDE_INT rounded_count
;
21718 /* If the size is known, it is shorter to use rep movs. */
21719 if (mode
== QImode
&& CONST_INT_P (count
)
21720 && !(INTVAL (count
) & 3))
21723 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21724 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21725 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21726 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21727 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21728 if (mode
!= QImode
)
21730 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21731 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21732 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21733 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21734 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21735 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21739 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21740 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21742 if (CONST_INT_P (count
))
21744 rounded_count
= (INTVAL (count
)
21745 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21746 destmem
= shallow_copy_rtx (destmem
);
21747 srcmem
= shallow_copy_rtx (srcmem
);
21748 set_mem_size (destmem
, rounded_count
);
21749 set_mem_size (srcmem
, rounded_count
);
21753 if (MEM_SIZE_KNOWN_P (destmem
))
21754 clear_mem_size (destmem
);
21755 if (MEM_SIZE_KNOWN_P (srcmem
))
21756 clear_mem_size (srcmem
);
21758 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21762 /* Output "rep; stos" instruction.
21763 Arguments have same meaning as for previous function */
21765 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21766 rtx count
, enum machine_mode mode
,
21771 HOST_WIDE_INT rounded_count
;
21773 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21774 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21775 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21776 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21777 if (mode
!= QImode
)
21779 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21780 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21781 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21784 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21785 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21787 rounded_count
= (INTVAL (count
)
21788 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21789 destmem
= shallow_copy_rtx (destmem
);
21790 set_mem_size (destmem
, rounded_count
);
21792 else if (MEM_SIZE_KNOWN_P (destmem
))
21793 clear_mem_size (destmem
);
21794 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21798 emit_strmov (rtx destmem
, rtx srcmem
,
21799 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21801 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21802 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21803 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21806 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21808 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21809 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21812 if (CONST_INT_P (count
))
21814 HOST_WIDE_INT countval
= INTVAL (count
);
21817 if ((countval
& 0x10) && max_size
> 16)
21821 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21822 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21825 gcc_unreachable ();
21828 if ((countval
& 0x08) && max_size
> 8)
21831 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21834 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21835 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21839 if ((countval
& 0x04) && max_size
> 4)
21841 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21844 if ((countval
& 0x02) && max_size
> 2)
21846 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21849 if ((countval
& 0x01) && max_size
> 1)
21851 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21858 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21859 count
, 1, OPTAB_DIRECT
);
21860 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21861 count
, QImode
, 1, 4);
21865 /* When there are stringops, we can cheaply increase dest and src pointers.
21866 Otherwise we save code size by maintaining offset (zero is readily
21867 available from preceding rep operation) and using x86 addressing modes.
21869 if (TARGET_SINGLE_STRINGOP
)
21873 rtx label
= ix86_expand_aligntest (count
, 4, true);
21874 src
= change_address (srcmem
, SImode
, srcptr
);
21875 dest
= change_address (destmem
, SImode
, destptr
);
21876 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21877 emit_label (label
);
21878 LABEL_NUSES (label
) = 1;
21882 rtx label
= ix86_expand_aligntest (count
, 2, true);
21883 src
= change_address (srcmem
, HImode
, srcptr
);
21884 dest
= change_address (destmem
, HImode
, destptr
);
21885 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21886 emit_label (label
);
21887 LABEL_NUSES (label
) = 1;
21891 rtx label
= ix86_expand_aligntest (count
, 1, true);
21892 src
= change_address (srcmem
, QImode
, srcptr
);
21893 dest
= change_address (destmem
, QImode
, destptr
);
21894 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21895 emit_label (label
);
21896 LABEL_NUSES (label
) = 1;
21901 rtx offset
= force_reg (Pmode
, const0_rtx
);
21906 rtx label
= ix86_expand_aligntest (count
, 4, true);
21907 src
= change_address (srcmem
, SImode
, srcptr
);
21908 dest
= change_address (destmem
, SImode
, destptr
);
21909 emit_move_insn (dest
, src
);
21910 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21911 true, OPTAB_LIB_WIDEN
);
21913 emit_move_insn (offset
, tmp
);
21914 emit_label (label
);
21915 LABEL_NUSES (label
) = 1;
21919 rtx label
= ix86_expand_aligntest (count
, 2, true);
21920 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21921 src
= change_address (srcmem
, HImode
, tmp
);
21922 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21923 dest
= change_address (destmem
, HImode
, tmp
);
21924 emit_move_insn (dest
, src
);
21925 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21926 true, OPTAB_LIB_WIDEN
);
21928 emit_move_insn (offset
, tmp
);
21929 emit_label (label
);
21930 LABEL_NUSES (label
) = 1;
21934 rtx label
= ix86_expand_aligntest (count
, 1, true);
21935 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21936 src
= change_address (srcmem
, QImode
, tmp
);
21937 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21938 dest
= change_address (destmem
, QImode
, tmp
);
21939 emit_move_insn (dest
, src
);
21940 emit_label (label
);
21941 LABEL_NUSES (label
) = 1;
21946 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21948 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21949 rtx count
, int max_size
)
21952 expand_simple_binop (counter_mode (count
), AND
, count
,
21953 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21954 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21955 gen_lowpart (QImode
, value
), count
, QImode
,
21959 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21961 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21965 if (CONST_INT_P (count
))
21967 HOST_WIDE_INT countval
= INTVAL (count
);
21970 if ((countval
& 0x10) && max_size
> 16)
21974 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21975 emit_insn (gen_strset (destptr
, dest
, value
));
21976 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21977 emit_insn (gen_strset (destptr
, dest
, value
));
21980 gcc_unreachable ();
21983 if ((countval
& 0x08) && max_size
> 8)
21987 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21988 emit_insn (gen_strset (destptr
, dest
, value
));
21992 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21993 emit_insn (gen_strset (destptr
, dest
, value
));
21994 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21995 emit_insn (gen_strset (destptr
, dest
, value
));
21999 if ((countval
& 0x04) && max_size
> 4)
22001 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22002 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22005 if ((countval
& 0x02) && max_size
> 2)
22007 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22008 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22011 if ((countval
& 0x01) && max_size
> 1)
22013 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22014 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22021 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22026 rtx label
= ix86_expand_aligntest (count
, 16, true);
22029 dest
= change_address (destmem
, DImode
, destptr
);
22030 emit_insn (gen_strset (destptr
, dest
, value
));
22031 emit_insn (gen_strset (destptr
, dest
, value
));
22035 dest
= change_address (destmem
, SImode
, destptr
);
22036 emit_insn (gen_strset (destptr
, dest
, value
));
22037 emit_insn (gen_strset (destptr
, dest
, value
));
22038 emit_insn (gen_strset (destptr
, dest
, value
));
22039 emit_insn (gen_strset (destptr
, dest
, value
));
22041 emit_label (label
);
22042 LABEL_NUSES (label
) = 1;
22046 rtx label
= ix86_expand_aligntest (count
, 8, true);
22049 dest
= change_address (destmem
, DImode
, destptr
);
22050 emit_insn (gen_strset (destptr
, dest
, value
));
22054 dest
= change_address (destmem
, SImode
, destptr
);
22055 emit_insn (gen_strset (destptr
, dest
, value
));
22056 emit_insn (gen_strset (destptr
, dest
, value
));
22058 emit_label (label
);
22059 LABEL_NUSES (label
) = 1;
22063 rtx label
= ix86_expand_aligntest (count
, 4, true);
22064 dest
= change_address (destmem
, SImode
, destptr
);
22065 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22066 emit_label (label
);
22067 LABEL_NUSES (label
) = 1;
22071 rtx label
= ix86_expand_aligntest (count
, 2, true);
22072 dest
= change_address (destmem
, HImode
, destptr
);
22073 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22074 emit_label (label
);
22075 LABEL_NUSES (label
) = 1;
22079 rtx label
= ix86_expand_aligntest (count
, 1, true);
22080 dest
= change_address (destmem
, QImode
, destptr
);
22081 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22082 emit_label (label
);
22083 LABEL_NUSES (label
) = 1;
22087 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22088 DESIRED_ALIGNMENT. */
22090 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22091 rtx destptr
, rtx srcptr
, rtx count
,
22092 int align
, int desired_alignment
)
22094 if (align
<= 1 && desired_alignment
> 1)
22096 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22097 srcmem
= change_address (srcmem
, QImode
, srcptr
);
22098 destmem
= change_address (destmem
, QImode
, destptr
);
22099 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22100 ix86_adjust_counter (count
, 1);
22101 emit_label (label
);
22102 LABEL_NUSES (label
) = 1;
22104 if (align
<= 2 && desired_alignment
> 2)
22106 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22107 srcmem
= change_address (srcmem
, HImode
, srcptr
);
22108 destmem
= change_address (destmem
, HImode
, destptr
);
22109 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22110 ix86_adjust_counter (count
, 2);
22111 emit_label (label
);
22112 LABEL_NUSES (label
) = 1;
22114 if (align
<= 4 && desired_alignment
> 4)
22116 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22117 srcmem
= change_address (srcmem
, SImode
, srcptr
);
22118 destmem
= change_address (destmem
, SImode
, destptr
);
22119 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22120 ix86_adjust_counter (count
, 4);
22121 emit_label (label
);
22122 LABEL_NUSES (label
) = 1;
22124 gcc_assert (desired_alignment
<= 8);
22127 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22128 ALIGN_BYTES is how many bytes need to be copied. */
22130 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22131 int desired_align
, int align_bytes
)
22134 rtx orig_dst
= dst
;
22135 rtx orig_src
= src
;
22137 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22138 if (src_align_bytes
>= 0)
22139 src_align_bytes
= desired_align
- src_align_bytes
;
22140 if (align_bytes
& 1)
22142 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22143 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22145 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22147 if (align_bytes
& 2)
22149 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22150 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22151 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22152 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22153 if (src_align_bytes
>= 0
22154 && (src_align_bytes
& 1) == (align_bytes
& 1)
22155 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22156 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22158 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22160 if (align_bytes
& 4)
22162 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22163 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22164 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22165 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22166 if (src_align_bytes
>= 0)
22168 unsigned int src_align
= 0;
22169 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22171 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22173 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22174 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22177 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22179 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22180 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22181 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22182 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22183 if (src_align_bytes
>= 0)
22185 unsigned int src_align
= 0;
22186 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22188 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22190 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22192 if (src_align
> (unsigned int) desired_align
)
22193 src_align
= desired_align
;
22194 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22195 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22197 if (MEM_SIZE_KNOWN_P (orig_dst
))
22198 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22199 if (MEM_SIZE_KNOWN_P (orig_src
))
22200 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22205 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22206 DESIRED_ALIGNMENT. */
22208 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22209 int align
, int desired_alignment
)
22211 if (align
<= 1 && desired_alignment
> 1)
22213 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22214 destmem
= change_address (destmem
, QImode
, destptr
);
22215 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22216 ix86_adjust_counter (count
, 1);
22217 emit_label (label
);
22218 LABEL_NUSES (label
) = 1;
22220 if (align
<= 2 && desired_alignment
> 2)
22222 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22223 destmem
= change_address (destmem
, HImode
, destptr
);
22224 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22225 ix86_adjust_counter (count
, 2);
22226 emit_label (label
);
22227 LABEL_NUSES (label
) = 1;
22229 if (align
<= 4 && desired_alignment
> 4)
22231 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22232 destmem
= change_address (destmem
, SImode
, destptr
);
22233 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22234 ix86_adjust_counter (count
, 4);
22235 emit_label (label
);
22236 LABEL_NUSES (label
) = 1;
22238 gcc_assert (desired_alignment
<= 8);
22241 /* Set enough from DST to align DST known to by aligned by ALIGN to
22242 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22244 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22245 int desired_align
, int align_bytes
)
22248 rtx orig_dst
= dst
;
22249 if (align_bytes
& 1)
22251 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22253 emit_insn (gen_strset (destreg
, dst
,
22254 gen_lowpart (QImode
, value
)));
22256 if (align_bytes
& 2)
22258 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22259 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22260 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22262 emit_insn (gen_strset (destreg
, dst
,
22263 gen_lowpart (HImode
, value
)));
22265 if (align_bytes
& 4)
22267 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22268 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22269 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22271 emit_insn (gen_strset (destreg
, dst
,
22272 gen_lowpart (SImode
, value
)));
22274 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22275 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22276 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22277 if (MEM_SIZE_KNOWN_P (orig_dst
))
22278 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22282 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22283 static enum stringop_alg
22284 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22285 int *dynamic_check
)
22287 const struct stringop_algs
* algs
;
22288 bool optimize_for_speed
;
22289 /* Algorithms using the rep prefix want at least edi and ecx;
22290 additionally, memset wants eax and memcpy wants esi. Don't
22291 consider such algorithms if the user has appropriated those
22292 registers for their own purposes. */
22293 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22295 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22297 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22298 || (alg != rep_prefix_1_byte \
22299 && alg != rep_prefix_4_byte \
22300 && alg != rep_prefix_8_byte))
22301 const struct processor_costs
*cost
;
22303 /* Even if the string operation call is cold, we still might spend a lot
22304 of time processing large blocks. */
22305 if (optimize_function_for_size_p (cfun
)
22306 || (optimize_insn_for_size_p ()
22307 && expected_size
!= -1 && expected_size
< 256))
22308 optimize_for_speed
= false;
22310 optimize_for_speed
= true;
22312 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22314 *dynamic_check
= -1;
22316 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22318 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22319 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22320 return ix86_stringop_alg
;
22321 /* rep; movq or rep; movl is the smallest variant. */
22322 else if (!optimize_for_speed
)
22324 if (!count
|| (count
& 3))
22325 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22327 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22329 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22331 else if (expected_size
!= -1 && expected_size
< 4)
22332 return loop_1_byte
;
22333 else if (expected_size
!= -1)
22336 enum stringop_alg alg
= libcall
;
22337 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22339 /* We get here if the algorithms that were not libcall-based
22340 were rep-prefix based and we are unable to use rep prefixes
22341 based on global register usage. Break out of the loop and
22342 use the heuristic below. */
22343 if (algs
->size
[i
].max
== 0)
22345 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22347 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22349 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22351 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22352 last non-libcall inline algorithm. */
22353 if (TARGET_INLINE_ALL_STRINGOPS
)
22355 /* When the current size is best to be copied by a libcall,
22356 but we are still forced to inline, run the heuristic below
22357 that will pick code for medium sized blocks. */
22358 if (alg
!= libcall
)
22362 else if (ALG_USABLE_P (candidate
))
22366 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22368 /* When asked to inline the call anyway, try to pick meaningful choice.
22369 We look for maximal size of block that is faster to copy by hand and
22370 take blocks of at most of that size guessing that average size will
22371 be roughly half of the block.
22373 If this turns out to be bad, we might simply specify the preferred
22374 choice in ix86_costs. */
22375 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22376 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22379 enum stringop_alg alg
;
22381 bool any_alg_usable_p
= true;
22383 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22385 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22386 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22388 if (candidate
!= libcall
&& candidate
22389 && ALG_USABLE_P (candidate
))
22390 max
= algs
->size
[i
].max
;
22392 /* If there aren't any usable algorithms, then recursing on
22393 smaller sizes isn't going to find anything. Just return the
22394 simple byte-at-a-time copy loop. */
22395 if (!any_alg_usable_p
)
22397 /* Pick something reasonable. */
22398 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22399 *dynamic_check
= 128;
22400 return loop_1_byte
;
22404 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22405 gcc_assert (*dynamic_check
== -1);
22406 gcc_assert (alg
!= libcall
);
22407 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22408 *dynamic_check
= max
;
22411 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22412 #undef ALG_USABLE_P
22415 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22416 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22418 decide_alignment (int align
,
22419 enum stringop_alg alg
,
22422 int desired_align
= 0;
22426 gcc_unreachable ();
22428 case unrolled_loop
:
22429 desired_align
= GET_MODE_SIZE (Pmode
);
22431 case rep_prefix_8_byte
:
22434 case rep_prefix_4_byte
:
22435 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22436 copying whole cacheline at once. */
22437 if (TARGET_PENTIUMPRO
)
22442 case rep_prefix_1_byte
:
22443 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22444 copying whole cacheline at once. */
22445 if (TARGET_PENTIUMPRO
)
22459 if (desired_align
< align
)
22460 desired_align
= align
;
22461 if (expected_size
!= -1 && expected_size
< 4)
22462 desired_align
= align
;
22463 return desired_align
;
22466 /* Return the smallest power of 2 greater than VAL. */
22468 smallest_pow2_greater_than (int val
)
22476 /* Expand string move (memcpy) operation. Use i386 string operations
22477 when profitable. expand_setmem contains similar code. The code
22478 depends upon architecture, block size and alignment, but always has
22479 the same overall structure:
22481 1) Prologue guard: Conditional that jumps up to epilogues for small
22482 blocks that can be handled by epilogue alone. This is faster
22483 but also needed for correctness, since prologue assume the block
22484 is larger than the desired alignment.
22486 Optional dynamic check for size and libcall for large
22487 blocks is emitted here too, with -minline-stringops-dynamically.
22489 2) Prologue: copy first few bytes in order to get destination
22490 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22491 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22492 copied. We emit either a jump tree on power of two sized
22493 blocks, or a byte loop.
22495 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22496 with specified algorithm.
22498 4) Epilogue: code copying tail of the block that is too small to be
22499 handled by main body (or up to size guarded by prologue guard). */
22502 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22503 rtx expected_align_exp
, rtx expected_size_exp
)
22509 rtx jump_around_label
= NULL
;
22510 HOST_WIDE_INT align
= 1;
22511 unsigned HOST_WIDE_INT count
= 0;
22512 HOST_WIDE_INT expected_size
= -1;
22513 int size_needed
= 0, epilogue_size_needed
;
22514 int desired_align
= 0, align_bytes
= 0;
22515 enum stringop_alg alg
;
22517 bool need_zero_guard
= false;
22519 if (CONST_INT_P (align_exp
))
22520 align
= INTVAL (align_exp
);
22521 /* i386 can do misaligned access on reasonably increased cost. */
22522 if (CONST_INT_P (expected_align_exp
)
22523 && INTVAL (expected_align_exp
) > align
)
22524 align
= INTVAL (expected_align_exp
);
22525 /* ALIGN is the minimum of destination and source alignment, but we care here
22526 just about destination alignment. */
22527 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22528 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22530 if (CONST_INT_P (count_exp
))
22531 count
= expected_size
= INTVAL (count_exp
);
22532 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22533 expected_size
= INTVAL (expected_size_exp
);
22535 /* Make sure we don't need to care about overflow later on. */
22536 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22539 /* Step 0: Decide on preferred algorithm, desired alignment and
22540 size of chunks to be copied by main loop. */
22542 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22543 desired_align
= decide_alignment (align
, alg
, expected_size
);
22545 if (!TARGET_ALIGN_STRINGOPS
)
22546 align
= desired_align
;
22548 if (alg
== libcall
)
22550 gcc_assert (alg
!= no_stringop
);
22552 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22553 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22554 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22559 gcc_unreachable ();
22561 need_zero_guard
= true;
22562 size_needed
= GET_MODE_SIZE (word_mode
);
22564 case unrolled_loop
:
22565 need_zero_guard
= true;
22566 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22568 case rep_prefix_8_byte
:
22571 case rep_prefix_4_byte
:
22574 case rep_prefix_1_byte
:
22578 need_zero_guard
= true;
22583 epilogue_size_needed
= size_needed
;
22585 /* Step 1: Prologue guard. */
22587 /* Alignment code needs count to be in register. */
22588 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22590 if (INTVAL (count_exp
) > desired_align
22591 && INTVAL (count_exp
) > size_needed
)
22594 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22595 if (align_bytes
<= 0)
22598 align_bytes
= desired_align
- align_bytes
;
22600 if (align_bytes
== 0)
22601 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22603 gcc_assert (desired_align
>= 1 && align
>= 1);
22605 /* Ensure that alignment prologue won't copy past end of block. */
22606 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22608 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22609 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22610 Make sure it is power of 2. */
22611 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22615 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22617 /* If main algorithm works on QImode, no epilogue is needed.
22618 For small sizes just don't align anything. */
22619 if (size_needed
== 1)
22620 desired_align
= align
;
22627 label
= gen_label_rtx ();
22628 emit_cmp_and_jump_insns (count_exp
,
22629 GEN_INT (epilogue_size_needed
),
22630 LTU
, 0, counter_mode (count_exp
), 1, label
);
22631 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22632 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22634 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22638 /* Emit code to decide on runtime whether library call or inline should be
22640 if (dynamic_check
!= -1)
22642 if (CONST_INT_P (count_exp
))
22644 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22646 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22647 count_exp
= const0_rtx
;
22653 rtx hot_label
= gen_label_rtx ();
22654 jump_around_label
= gen_label_rtx ();
22655 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22656 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22657 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22658 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22659 emit_jump (jump_around_label
);
22660 emit_label (hot_label
);
22664 /* Step 2: Alignment prologue. */
22666 if (desired_align
> align
)
22668 if (align_bytes
== 0)
22670 /* Except for the first move in epilogue, we no longer know
22671 constant offset in aliasing info. It don't seems to worth
22672 the pain to maintain it for the first move, so throw away
22674 src
= change_address (src
, BLKmode
, srcreg
);
22675 dst
= change_address (dst
, BLKmode
, destreg
);
22676 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22681 /* If we know how many bytes need to be stored before dst is
22682 sufficiently aligned, maintain aliasing info accurately. */
22683 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22684 desired_align
, align_bytes
);
22685 count_exp
= plus_constant (counter_mode (count_exp
),
22686 count_exp
, -align_bytes
);
22687 count
-= align_bytes
;
22689 if (need_zero_guard
22690 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22691 || (align_bytes
== 0
22692 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22693 + desired_align
- align
))))
22695 /* It is possible that we copied enough so the main loop will not
22697 gcc_assert (size_needed
> 1);
22698 if (label
== NULL_RTX
)
22699 label
= gen_label_rtx ();
22700 emit_cmp_and_jump_insns (count_exp
,
22701 GEN_INT (size_needed
),
22702 LTU
, 0, counter_mode (count_exp
), 1, label
);
22703 if (expected_size
== -1
22704 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22705 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22707 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22710 if (label
&& size_needed
== 1)
22712 emit_label (label
);
22713 LABEL_NUSES (label
) = 1;
22715 epilogue_size_needed
= 1;
22717 else if (label
== NULL_RTX
)
22718 epilogue_size_needed
= size_needed
;
22720 /* Step 3: Main loop. */
22726 gcc_unreachable ();
22728 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22729 count_exp
, QImode
, 1, expected_size
);
22732 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22733 count_exp
, word_mode
, 1, expected_size
);
22735 case unrolled_loop
:
22736 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22737 registers for 4 temporaries anyway. */
22738 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22739 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22742 case rep_prefix_8_byte
:
22743 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22746 case rep_prefix_4_byte
:
22747 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22750 case rep_prefix_1_byte
:
22751 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22755 /* Adjust properly the offset of src and dest memory for aliasing. */
22756 if (CONST_INT_P (count_exp
))
22758 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22759 (count
/ size_needed
) * size_needed
);
22760 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22761 (count
/ size_needed
) * size_needed
);
22765 src
= change_address (src
, BLKmode
, srcreg
);
22766 dst
= change_address (dst
, BLKmode
, destreg
);
22769 /* Step 4: Epilogue to copy the remaining bytes. */
22773 /* When the main loop is done, COUNT_EXP might hold original count,
22774 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22775 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22776 bytes. Compensate if needed. */
22778 if (size_needed
< epilogue_size_needed
)
22781 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22782 GEN_INT (size_needed
- 1), count_exp
, 1,
22784 if (tmp
!= count_exp
)
22785 emit_move_insn (count_exp
, tmp
);
22787 emit_label (label
);
22788 LABEL_NUSES (label
) = 1;
22791 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22792 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22793 epilogue_size_needed
);
22794 if (jump_around_label
)
22795 emit_label (jump_around_label
);
22799 /* Helper function for memcpy. For QImode value 0xXY produce
22800 0xXYXYXYXY of wide specified by MODE. This is essentially
22801 a * 0x10101010, but we can do slightly better than
22802 synth_mult by unwinding the sequence by hand on CPUs with
22805 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22807 enum machine_mode valmode
= GET_MODE (val
);
22809 int nops
= mode
== DImode
? 3 : 2;
22811 gcc_assert (mode
== SImode
|| mode
== DImode
);
22812 if (val
== const0_rtx
)
22813 return copy_to_mode_reg (mode
, const0_rtx
);
22814 if (CONST_INT_P (val
))
22816 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22820 if (mode
== DImode
)
22821 v
|= (v
<< 16) << 16;
22822 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22825 if (valmode
== VOIDmode
)
22827 if (valmode
!= QImode
)
22828 val
= gen_lowpart (QImode
, val
);
22829 if (mode
== QImode
)
22831 if (!TARGET_PARTIAL_REG_STALL
)
22833 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22834 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22835 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22836 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22838 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22839 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22840 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22845 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22847 if (!TARGET_PARTIAL_REG_STALL
)
22848 if (mode
== SImode
)
22849 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22851 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22854 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22855 NULL
, 1, OPTAB_DIRECT
);
22857 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22859 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22860 NULL
, 1, OPTAB_DIRECT
);
22861 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22862 if (mode
== SImode
)
22864 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22865 NULL
, 1, OPTAB_DIRECT
);
22866 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22871 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22872 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22873 alignment from ALIGN to DESIRED_ALIGN. */
22875 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22880 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22881 promoted_val
= promote_duplicated_reg (DImode
, val
);
22882 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22883 promoted_val
= promote_duplicated_reg (SImode
, val
);
22884 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22885 promoted_val
= promote_duplicated_reg (HImode
, val
);
22887 promoted_val
= val
;
22889 return promoted_val
;
22892 /* Expand string clear operation (bzero). Use i386 string operations when
22893 profitable. See expand_movmem comment for explanation of individual
22894 steps performed. */
22896 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22897 rtx expected_align_exp
, rtx expected_size_exp
)
22902 rtx jump_around_label
= NULL
;
22903 HOST_WIDE_INT align
= 1;
22904 unsigned HOST_WIDE_INT count
= 0;
22905 HOST_WIDE_INT expected_size
= -1;
22906 int size_needed
= 0, epilogue_size_needed
;
22907 int desired_align
= 0, align_bytes
= 0;
22908 enum stringop_alg alg
;
22909 rtx promoted_val
= NULL
;
22910 bool force_loopy_epilogue
= false;
22912 bool need_zero_guard
= false;
22914 if (CONST_INT_P (align_exp
))
22915 align
= INTVAL (align_exp
);
22916 /* i386 can do misaligned access on reasonably increased cost. */
22917 if (CONST_INT_P (expected_align_exp
)
22918 && INTVAL (expected_align_exp
) > align
)
22919 align
= INTVAL (expected_align_exp
);
22920 if (CONST_INT_P (count_exp
))
22921 count
= expected_size
= INTVAL (count_exp
);
22922 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22923 expected_size
= INTVAL (expected_size_exp
);
22925 /* Make sure we don't need to care about overflow later on. */
22926 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22929 /* Step 0: Decide on preferred algorithm, desired alignment and
22930 size of chunks to be copied by main loop. */
22932 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22933 desired_align
= decide_alignment (align
, alg
, expected_size
);
22935 if (!TARGET_ALIGN_STRINGOPS
)
22936 align
= desired_align
;
22938 if (alg
== libcall
)
22940 gcc_assert (alg
!= no_stringop
);
22942 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22943 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22948 gcc_unreachable ();
22950 need_zero_guard
= true;
22951 size_needed
= GET_MODE_SIZE (word_mode
);
22953 case unrolled_loop
:
22954 need_zero_guard
= true;
22955 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22957 case rep_prefix_8_byte
:
22960 case rep_prefix_4_byte
:
22963 case rep_prefix_1_byte
:
22967 need_zero_guard
= true;
22971 epilogue_size_needed
= size_needed
;
22973 /* Step 1: Prologue guard. */
22975 /* Alignment code needs count to be in register. */
22976 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22978 if (INTVAL (count_exp
) > desired_align
22979 && INTVAL (count_exp
) > size_needed
)
22982 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22983 if (align_bytes
<= 0)
22986 align_bytes
= desired_align
- align_bytes
;
22988 if (align_bytes
== 0)
22990 enum machine_mode mode
= SImode
;
22991 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22993 count_exp
= force_reg (mode
, count_exp
);
22996 /* Do the cheap promotion to allow better CSE across the
22997 main loop and epilogue (ie one load of the big constant in the
22998 front of all code. */
22999 if (CONST_INT_P (val_exp
))
23000 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23001 desired_align
, align
);
23002 /* Ensure that alignment prologue won't copy past end of block. */
23003 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23005 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23006 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23007 Make sure it is power of 2. */
23008 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
23010 /* To improve performance of small blocks, we jump around the VAL
23011 promoting mode. This mean that if the promoted VAL is not constant,
23012 we might not use it in the epilogue and have to use byte
23014 if (epilogue_size_needed
> 2 && !promoted_val
)
23015 force_loopy_epilogue
= true;
23018 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23020 /* If main algorithm works on QImode, no epilogue is needed.
23021 For small sizes just don't align anything. */
23022 if (size_needed
== 1)
23023 desired_align
= align
;
23030 label
= gen_label_rtx ();
23031 emit_cmp_and_jump_insns (count_exp
,
23032 GEN_INT (epilogue_size_needed
),
23033 LTU
, 0, counter_mode (count_exp
), 1, label
);
23034 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23035 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23037 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23040 if (dynamic_check
!= -1)
23042 rtx hot_label
= gen_label_rtx ();
23043 jump_around_label
= gen_label_rtx ();
23044 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23045 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23046 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23047 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23048 emit_jump (jump_around_label
);
23049 emit_label (hot_label
);
23052 /* Step 2: Alignment prologue. */
23054 /* Do the expensive promotion once we branched off the small blocks. */
23056 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23057 desired_align
, align
);
23058 gcc_assert (desired_align
>= 1 && align
>= 1);
23060 if (desired_align
> align
)
23062 if (align_bytes
== 0)
23064 /* Except for the first move in epilogue, we no longer know
23065 constant offset in aliasing info. It don't seems to worth
23066 the pain to maintain it for the first move, so throw away
23068 dst
= change_address (dst
, BLKmode
, destreg
);
23069 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23074 /* If we know how many bytes need to be stored before dst is
23075 sufficiently aligned, maintain aliasing info accurately. */
23076 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23077 desired_align
, align_bytes
);
23078 count_exp
= plus_constant (counter_mode (count_exp
),
23079 count_exp
, -align_bytes
);
23080 count
-= align_bytes
;
23082 if (need_zero_guard
23083 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23084 || (align_bytes
== 0
23085 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23086 + desired_align
- align
))))
23088 /* It is possible that we copied enough so the main loop will not
23090 gcc_assert (size_needed
> 1);
23091 if (label
== NULL_RTX
)
23092 label
= gen_label_rtx ();
23093 emit_cmp_and_jump_insns (count_exp
,
23094 GEN_INT (size_needed
),
23095 LTU
, 0, counter_mode (count_exp
), 1, label
);
23096 if (expected_size
== -1
23097 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23098 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23100 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23103 if (label
&& size_needed
== 1)
23105 emit_label (label
);
23106 LABEL_NUSES (label
) = 1;
23108 promoted_val
= val_exp
;
23109 epilogue_size_needed
= 1;
23111 else if (label
== NULL_RTX
)
23112 epilogue_size_needed
= size_needed
;
23114 /* Step 3: Main loop. */
23120 gcc_unreachable ();
23122 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23123 count_exp
, QImode
, 1, expected_size
);
23126 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23127 count_exp
, word_mode
, 1, expected_size
);
23129 case unrolled_loop
:
23130 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23131 count_exp
, word_mode
, 4, expected_size
);
23133 case rep_prefix_8_byte
:
23134 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23137 case rep_prefix_4_byte
:
23138 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23141 case rep_prefix_1_byte
:
23142 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23146 /* Adjust properly the offset of src and dest memory for aliasing. */
23147 if (CONST_INT_P (count_exp
))
23148 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23149 (count
/ size_needed
) * size_needed
);
23151 dst
= change_address (dst
, BLKmode
, destreg
);
23153 /* Step 4: Epilogue to copy the remaining bytes. */
23157 /* When the main loop is done, COUNT_EXP might hold original count,
23158 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23159 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23160 bytes. Compensate if needed. */
23162 if (size_needed
< epilogue_size_needed
)
23165 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23166 GEN_INT (size_needed
- 1), count_exp
, 1,
23168 if (tmp
!= count_exp
)
23169 emit_move_insn (count_exp
, tmp
);
23171 emit_label (label
);
23172 LABEL_NUSES (label
) = 1;
23175 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23177 if (force_loopy_epilogue
)
23178 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23179 epilogue_size_needed
);
23181 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23182 epilogue_size_needed
);
23184 if (jump_around_label
)
23185 emit_label (jump_around_label
);
23189 /* Expand the appropriate insns for doing strlen if not just doing
23192 out = result, initialized with the start address
23193 align_rtx = alignment of the address.
23194 scratch = scratch register, initialized with the startaddress when
23195 not aligned, otherwise undefined
23197 This is just the body. It needs the initializations mentioned above and
23198 some address computing at the end. These things are done in i386.md. */
23201 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23205 rtx align_2_label
= NULL_RTX
;
23206 rtx align_3_label
= NULL_RTX
;
23207 rtx align_4_label
= gen_label_rtx ();
23208 rtx end_0_label
= gen_label_rtx ();
23210 rtx tmpreg
= gen_reg_rtx (SImode
);
23211 rtx scratch
= gen_reg_rtx (SImode
);
23215 if (CONST_INT_P (align_rtx
))
23216 align
= INTVAL (align_rtx
);
23218 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23220 /* Is there a known alignment and is it less than 4? */
23223 rtx scratch1
= gen_reg_rtx (Pmode
);
23224 emit_move_insn (scratch1
, out
);
23225 /* Is there a known alignment and is it not 2? */
23228 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23229 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23231 /* Leave just the 3 lower bits. */
23232 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23233 NULL_RTX
, 0, OPTAB_WIDEN
);
23235 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23236 Pmode
, 1, align_4_label
);
23237 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23238 Pmode
, 1, align_2_label
);
23239 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23240 Pmode
, 1, align_3_label
);
23244 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23245 check if is aligned to 4 - byte. */
23247 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23248 NULL_RTX
, 0, OPTAB_WIDEN
);
23250 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23251 Pmode
, 1, align_4_label
);
23254 mem
= change_address (src
, QImode
, out
);
23256 /* Now compare the bytes. */
23258 /* Compare the first n unaligned byte on a byte per byte basis. */
23259 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23260 QImode
, 1, end_0_label
);
23262 /* Increment the address. */
23263 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23265 /* Not needed with an alignment of 2 */
23268 emit_label (align_2_label
);
23270 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23273 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23275 emit_label (align_3_label
);
23278 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23281 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23284 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23285 align this loop. It gives only huge programs, but does not help to
23287 emit_label (align_4_label
);
23289 mem
= change_address (src
, SImode
, out
);
23290 emit_move_insn (scratch
, mem
);
23291 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23293 /* This formula yields a nonzero result iff one of the bytes is zero.
23294 This saves three branches inside loop and many cycles. */
23296 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23297 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23298 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23299 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23300 gen_int_mode (0x80808080, SImode
)));
23301 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23306 rtx reg
= gen_reg_rtx (SImode
);
23307 rtx reg2
= gen_reg_rtx (Pmode
);
23308 emit_move_insn (reg
, tmpreg
);
23309 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23311 /* If zero is not in the first two bytes, move two bytes forward. */
23312 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23313 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23314 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23315 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23316 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23319 /* Emit lea manually to avoid clobbering of flags. */
23320 emit_insn (gen_rtx_SET (SImode
, reg2
,
23321 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23323 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23324 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23325 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23326 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23332 rtx end_2_label
= gen_label_rtx ();
23333 /* Is zero in the first two bytes? */
23335 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23336 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23337 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23338 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23339 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23341 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23342 JUMP_LABEL (tmp
) = end_2_label
;
23344 /* Not in the first two. Move two bytes forward. */
23345 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23346 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23348 emit_label (end_2_label
);
23352 /* Avoid branch in fixing the byte. */
23353 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23354 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23355 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23356 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23357 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23359 emit_label (end_0_label
);
23362 /* Expand strlen. */
23365 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23367 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23369 /* The generic case of strlen expander is long. Avoid it's
23370 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23372 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23373 && !TARGET_INLINE_ALL_STRINGOPS
23374 && !optimize_insn_for_size_p ()
23375 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23378 addr
= force_reg (Pmode
, XEXP (src
, 0));
23379 scratch1
= gen_reg_rtx (Pmode
);
23381 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23382 && !optimize_insn_for_size_p ())
23384 /* Well it seems that some optimizer does not combine a call like
23385 foo(strlen(bar), strlen(bar));
23386 when the move and the subtraction is done here. It does calculate
23387 the length just once when these instructions are done inside of
23388 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23389 often used and I use one fewer register for the lifetime of
23390 output_strlen_unroll() this is better. */
23392 emit_move_insn (out
, addr
);
23394 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23396 /* strlensi_unroll_1 returns the address of the zero at the end of
23397 the string, like memchr(), so compute the length by subtracting
23398 the start address. */
23399 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23405 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23406 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23409 scratch2
= gen_reg_rtx (Pmode
);
23410 scratch3
= gen_reg_rtx (Pmode
);
23411 scratch4
= force_reg (Pmode
, constm1_rtx
);
23413 emit_move_insn (scratch3
, addr
);
23414 eoschar
= force_reg (QImode
, eoschar
);
23416 src
= replace_equiv_address_nv (src
, scratch3
);
23418 /* If .md starts supporting :P, this can be done in .md. */
23419 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23420 scratch4
), UNSPEC_SCAS
);
23421 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23422 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23423 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23428 /* For given symbol (function) construct code to compute address of it's PLT
23429 entry in large x86-64 PIC model. */
23431 construct_plt_address (rtx symbol
)
23435 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23436 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23437 gcc_assert (Pmode
== DImode
);
23439 tmp
= gen_reg_rtx (Pmode
);
23440 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23442 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23443 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23448 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23450 rtx pop
, bool sibcall
)
23452 /* We need to represent that SI and DI registers are clobbered
23454 static int clobbered_registers
[] = {
23455 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23456 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23457 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23458 XMM15_REG
, SI_REG
, DI_REG
23460 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23461 rtx use
= NULL
, call
;
23462 unsigned int vec_len
;
23464 if (pop
== const0_rtx
)
23466 gcc_assert (!TARGET_64BIT
|| !pop
);
23468 if (TARGET_MACHO
&& !TARGET_64BIT
)
23471 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23472 fnaddr
= machopic_indirect_call_target (fnaddr
);
23477 /* Static functions and indirect calls don't need the pic register. */
23478 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23479 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23480 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23481 use_reg (&use
, pic_offset_table_rtx
);
23484 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23486 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23487 emit_move_insn (al
, callarg2
);
23488 use_reg (&use
, al
);
23491 if (ix86_cmodel
== CM_LARGE_PIC
23493 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23494 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23495 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23497 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23498 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23500 fnaddr
= XEXP (fnaddr
, 0);
23501 if (GET_MODE (fnaddr
) != word_mode
)
23502 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23503 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23507 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23509 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23510 vec
[vec_len
++] = call
;
23514 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23515 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23516 vec
[vec_len
++] = pop
;
23519 if (TARGET_64BIT_MS_ABI
23520 && (!callarg2
|| INTVAL (callarg2
) != -2))
23524 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23525 UNSPEC_MS_TO_SYSV_CALL
);
23527 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23529 = gen_rtx_CLOBBER (VOIDmode
,
23530 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23532 clobbered_registers
[i
]));
23536 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23537 call
= emit_call_insn (call
);
23539 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23544 /* Output the assembly for a call instruction. */
23547 ix86_output_call_insn (rtx insn
, rtx call_op
)
23549 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23550 bool seh_nop_p
= false;
23553 if (SIBLING_CALL_P (insn
))
23557 /* SEH epilogue detection requires the indirect branch case
23558 to include REX.W. */
23559 else if (TARGET_SEH
)
23560 xasm
= "rex.W jmp %A0";
23564 output_asm_insn (xasm
, &call_op
);
23568 /* SEH unwinding can require an extra nop to be emitted in several
23569 circumstances. Determine if we have one of those. */
23574 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23576 /* If we get to another real insn, we don't need the nop. */
23580 /* If we get to the epilogue note, prevent a catch region from
23581 being adjacent to the standard epilogue sequence. If non-
23582 call-exceptions, we'll have done this during epilogue emission. */
23583 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23584 && !flag_non_call_exceptions
23585 && !can_throw_internal (insn
))
23592 /* If we didn't find a real insn following the call, prevent the
23593 unwinder from looking into the next function. */
23599 xasm
= "call\t%P0";
23601 xasm
= "call\t%A0";
23603 output_asm_insn (xasm
, &call_op
);
23611 /* Clear stack slot assignments remembered from previous functions.
23612 This is called from INIT_EXPANDERS once before RTL is emitted for each
23615 static struct machine_function
*
23616 ix86_init_machine_status (void)
23618 struct machine_function
*f
;
23620 f
= ggc_alloc_cleared_machine_function ();
23621 f
->use_fast_prologue_epilogue_nregs
= -1;
23622 f
->call_abi
= ix86_abi
;
23627 /* Return a MEM corresponding to a stack slot with mode MODE.
23628 Allocate a new slot if necessary.
23630 The RTL for a function can have several slots available: N is
23631 which slot to use. */
23634 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23636 struct stack_local_entry
*s
;
23638 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23640 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23641 if (s
->mode
== mode
&& s
->n
== n
)
23642 return validize_mem (copy_rtx (s
->rtl
));
23644 s
= ggc_alloc_stack_local_entry ();
23647 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23649 s
->next
= ix86_stack_locals
;
23650 ix86_stack_locals
= s
;
23651 return validize_mem (s
->rtl
);
23655 ix86_instantiate_decls (void)
23657 struct stack_local_entry
*s
;
23659 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23660 if (s
->rtl
!= NULL_RTX
)
23661 instantiate_decl_rtl (s
->rtl
);
23664 /* Calculate the length of the memory address in the instruction encoding.
23665 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23666 or other prefixes. We never generate addr32 prefix for LEA insn. */
23669 memory_address_length (rtx addr
, bool lea
)
23671 struct ix86_address parts
;
23672 rtx base
, index
, disp
;
23676 if (GET_CODE (addr
) == PRE_DEC
23677 || GET_CODE (addr
) == POST_INC
23678 || GET_CODE (addr
) == PRE_MODIFY
23679 || GET_CODE (addr
) == POST_MODIFY
)
23682 ok
= ix86_decompose_address (addr
, &parts
);
23685 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
23687 /* If this is not LEA instruction, add the length of addr32 prefix. */
23688 if (TARGET_64BIT
&& !lea
23689 && (SImode_address_operand (addr
, VOIDmode
)
23690 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
23691 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
23695 index
= parts
.index
;
23698 if (base
&& GET_CODE (base
) == SUBREG
)
23699 base
= SUBREG_REG (base
);
23700 if (index
&& GET_CODE (index
) == SUBREG
)
23701 index
= SUBREG_REG (index
);
23703 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
23704 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
23707 - esp as the base always wants an index,
23708 - ebp as the base always wants a displacement,
23709 - r12 as the base always wants an index,
23710 - r13 as the base always wants a displacement. */
23712 /* Register Indirect. */
23713 if (base
&& !index
&& !disp
)
23715 /* esp (for its index) and ebp (for its displacement) need
23716 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23718 if (base
== arg_pointer_rtx
23719 || base
== frame_pointer_rtx
23720 || REGNO (base
) == SP_REG
23721 || REGNO (base
) == BP_REG
23722 || REGNO (base
) == R12_REG
23723 || REGNO (base
) == R13_REG
)
23727 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23728 is not disp32, but disp32(%rip), so for disp32
23729 SIB byte is needed, unless print_operand_address
23730 optimizes it into disp32(%rip) or (%rip) is implied
23732 else if (disp
&& !base
&& !index
)
23739 if (GET_CODE (disp
) == CONST
)
23740 symbol
= XEXP (disp
, 0);
23741 if (GET_CODE (symbol
) == PLUS
23742 && CONST_INT_P (XEXP (symbol
, 1)))
23743 symbol
= XEXP (symbol
, 0);
23745 if (GET_CODE (symbol
) != LABEL_REF
23746 && (GET_CODE (symbol
) != SYMBOL_REF
23747 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23748 && (GET_CODE (symbol
) != UNSPEC
23749 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23750 && XINT (symbol
, 1) != UNSPEC_PCREL
23751 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23757 /* Find the length of the displacement constant. */
23760 if (base
&& satisfies_constraint_K (disp
))
23765 /* ebp always wants a displacement. Similarly r13. */
23766 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23769 /* An index requires the two-byte modrm form.... */
23771 /* ...like esp (or r12), which always wants an index. */
23772 || base
== arg_pointer_rtx
23773 || base
== frame_pointer_rtx
23774 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23781 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23782 is set, expect that insn have 8bit immediate alternative. */
23784 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23788 extract_insn_cached (insn
);
23789 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23790 if (CONSTANT_P (recog_data
.operand
[i
]))
23792 enum attr_mode mode
= get_attr_mode (insn
);
23795 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23797 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23804 ival
= trunc_int_for_mode (ival
, HImode
);
23807 ival
= trunc_int_for_mode (ival
, SImode
);
23812 if (IN_RANGE (ival
, -128, 127))
23829 /* Immediates for DImode instructions are encoded
23830 as 32bit sign extended values. */
23835 fatal_insn ("unknown insn mode", insn
);
23841 /* Compute default value for "length_address" attribute. */
23843 ix86_attr_length_address_default (rtx insn
)
23847 if (get_attr_type (insn
) == TYPE_LEA
)
23849 rtx set
= PATTERN (insn
), addr
;
23851 if (GET_CODE (set
) == PARALLEL
)
23852 set
= XVECEXP (set
, 0, 0);
23854 gcc_assert (GET_CODE (set
) == SET
);
23856 addr
= SET_SRC (set
);
23858 return memory_address_length (addr
, true);
23861 extract_insn_cached (insn
);
23862 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23863 if (MEM_P (recog_data
.operand
[i
]))
23865 constrain_operands_cached (reload_completed
);
23866 if (which_alternative
!= -1)
23868 const char *constraints
= recog_data
.constraints
[i
];
23869 int alt
= which_alternative
;
23871 while (*constraints
== '=' || *constraints
== '+')
23874 while (*constraints
++ != ',')
23876 /* Skip ignored operands. */
23877 if (*constraints
== 'X')
23880 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
23885 /* Compute default value for "length_vex" attribute. It includes
23886 2 or 3 byte VEX prefix and 1 opcode byte. */
23889 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23893 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23894 byte VEX prefix. */
23895 if (!has_0f_opcode
|| has_vex_w
)
23898 /* We can always use 2 byte VEX prefix in 32bit. */
23902 extract_insn_cached (insn
);
23904 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23905 if (REG_P (recog_data
.operand
[i
]))
23907 /* REX.W bit uses 3 byte VEX prefix. */
23908 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23909 && GENERAL_REG_P (recog_data
.operand
[i
]))
23914 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23915 if (MEM_P (recog_data
.operand
[i
])
23916 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23923 /* Return the maximum number of instructions a cpu can issue. */
23926 ix86_issue_rate (void)
23930 case PROCESSOR_PENTIUM
:
23931 case PROCESSOR_ATOM
:
23933 case PROCESSOR_BTVER2
:
23936 case PROCESSOR_PENTIUMPRO
:
23937 case PROCESSOR_PENTIUM4
:
23938 case PROCESSOR_CORE2_32
:
23939 case PROCESSOR_CORE2_64
:
23940 case PROCESSOR_COREI7_32
:
23941 case PROCESSOR_COREI7_64
:
23942 case PROCESSOR_ATHLON
:
23944 case PROCESSOR_AMDFAM10
:
23945 case PROCESSOR_NOCONA
:
23946 case PROCESSOR_GENERIC32
:
23947 case PROCESSOR_GENERIC64
:
23948 case PROCESSOR_BDVER1
:
23949 case PROCESSOR_BDVER2
:
23950 case PROCESSOR_BDVER3
:
23951 case PROCESSOR_BTVER1
:
23959 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23960 by DEP_INSN and nothing set by DEP_INSN. */
23963 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23967 /* Simplify the test for uninteresting insns. */
23968 if (insn_type
!= TYPE_SETCC
23969 && insn_type
!= TYPE_ICMOV
23970 && insn_type
!= TYPE_FCMOV
23971 && insn_type
!= TYPE_IBR
)
23974 if ((set
= single_set (dep_insn
)) != 0)
23976 set
= SET_DEST (set
);
23979 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23980 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23981 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23982 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23984 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23985 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23990 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23993 /* This test is true if the dependent insn reads the flags but
23994 not any other potentially set register. */
23995 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23998 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24004 /* Return true iff USE_INSN has a memory address with operands set by
24008 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24011 extract_insn_cached (use_insn
);
24012 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24013 if (MEM_P (recog_data
.operand
[i
]))
24015 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24016 return modified_in_p (addr
, set_insn
) != 0;
24022 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24024 enum attr_type insn_type
, dep_insn_type
;
24025 enum attr_memory memory
;
24027 int dep_insn_code_number
;
24029 /* Anti and output dependencies have zero cost on all CPUs. */
24030 if (REG_NOTE_KIND (link
) != 0)
24033 dep_insn_code_number
= recog_memoized (dep_insn
);
24035 /* If we can't recognize the insns, we can't really do anything. */
24036 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24039 insn_type
= get_attr_type (insn
);
24040 dep_insn_type
= get_attr_type (dep_insn
);
24044 case PROCESSOR_PENTIUM
:
24045 /* Address Generation Interlock adds a cycle of latency. */
24046 if (insn_type
== TYPE_LEA
)
24048 rtx addr
= PATTERN (insn
);
24050 if (GET_CODE (addr
) == PARALLEL
)
24051 addr
= XVECEXP (addr
, 0, 0);
24053 gcc_assert (GET_CODE (addr
) == SET
);
24055 addr
= SET_SRC (addr
);
24056 if (modified_in_p (addr
, dep_insn
))
24059 else if (ix86_agi_dependent (dep_insn
, insn
))
24062 /* ??? Compares pair with jump/setcc. */
24063 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24066 /* Floating point stores require value to be ready one cycle earlier. */
24067 if (insn_type
== TYPE_FMOV
24068 && get_attr_memory (insn
) == MEMORY_STORE
24069 && !ix86_agi_dependent (dep_insn
, insn
))
24073 case PROCESSOR_PENTIUMPRO
:
24074 memory
= get_attr_memory (insn
);
24076 /* INT->FP conversion is expensive. */
24077 if (get_attr_fp_int_src (dep_insn
))
24080 /* There is one cycle extra latency between an FP op and a store. */
24081 if (insn_type
== TYPE_FMOV
24082 && (set
= single_set (dep_insn
)) != NULL_RTX
24083 && (set2
= single_set (insn
)) != NULL_RTX
24084 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24085 && MEM_P (SET_DEST (set2
)))
24088 /* Show ability of reorder buffer to hide latency of load by executing
24089 in parallel with previous instruction in case
24090 previous instruction is not needed to compute the address. */
24091 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24092 && !ix86_agi_dependent (dep_insn
, insn
))
24094 /* Claim moves to take one cycle, as core can issue one load
24095 at time and the next load can start cycle later. */
24096 if (dep_insn_type
== TYPE_IMOV
24097 || dep_insn_type
== TYPE_FMOV
)
24105 memory
= get_attr_memory (insn
);
24107 /* The esp dependency is resolved before the instruction is really
24109 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24110 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24113 /* INT->FP conversion is expensive. */
24114 if (get_attr_fp_int_src (dep_insn
))
24117 /* Show ability of reorder buffer to hide latency of load by executing
24118 in parallel with previous instruction in case
24119 previous instruction is not needed to compute the address. */
24120 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24121 && !ix86_agi_dependent (dep_insn
, insn
))
24123 /* Claim moves to take one cycle, as core can issue one load
24124 at time and the next load can start cycle later. */
24125 if (dep_insn_type
== TYPE_IMOV
24126 || dep_insn_type
== TYPE_FMOV
)
24135 case PROCESSOR_ATHLON
:
24137 case PROCESSOR_AMDFAM10
:
24138 case PROCESSOR_BDVER1
:
24139 case PROCESSOR_BDVER2
:
24140 case PROCESSOR_BDVER3
:
24141 case PROCESSOR_BTVER1
:
24142 case PROCESSOR_BTVER2
:
24143 case PROCESSOR_ATOM
:
24144 case PROCESSOR_GENERIC32
:
24145 case PROCESSOR_GENERIC64
:
24146 memory
= get_attr_memory (insn
);
24148 /* Show ability of reorder buffer to hide latency of load by executing
24149 in parallel with previous instruction in case
24150 previous instruction is not needed to compute the address. */
24151 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24152 && !ix86_agi_dependent (dep_insn
, insn
))
24154 enum attr_unit unit
= get_attr_unit (insn
);
24157 /* Because of the difference between the length of integer and
24158 floating unit pipeline preparation stages, the memory operands
24159 for floating point are cheaper.
24161 ??? For Athlon it the difference is most probably 2. */
24162 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24165 loadcost
= TARGET_ATHLON
? 2 : 0;
24167 if (cost
>= loadcost
)
24180 /* How many alternative schedules to try. This should be as wide as the
24181 scheduling freedom in the DFA, but no wider. Making this value too
24182 large results extra work for the scheduler. */
24185 ia32_multipass_dfa_lookahead (void)
24189 case PROCESSOR_PENTIUM
:
24192 case PROCESSOR_PENTIUMPRO
:
24196 case PROCESSOR_CORE2_32
:
24197 case PROCESSOR_CORE2_64
:
24198 case PROCESSOR_COREI7_32
:
24199 case PROCESSOR_COREI7_64
:
24200 case PROCESSOR_ATOM
:
24201 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24202 as many instructions can be executed on a cycle, i.e.,
24203 issue_rate. I wonder why tuning for many CPUs does not do this. */
24204 if (reload_completed
)
24205 return ix86_issue_rate ();
24206 /* Don't use lookahead for pre-reload schedule to save compile time. */
24214 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24215 execution. It is applied if
24216 (1) IMUL instruction is on the top of list;
24217 (2) There exists the only producer of independent IMUL instruction in
24219 (3) Put found producer on the top of ready list.
24220 Returns issue rate. */
24223 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24224 int clock_var ATTRIBUTE_UNUSED
)
24226 static int issue_rate
= -1;
24227 int n_ready
= *pn_ready
;
24228 rtx insn
, insn1
, insn2
;
24230 sd_iterator_def sd_it
;
24234 /* Set up issue rate. */
24235 issue_rate
= ix86_issue_rate();
24237 /* Do reodering for Atom only. */
24238 if (ix86_tune
!= PROCESSOR_ATOM
)
24240 /* Do not perform ready list reodering for pre-reload schedule pass. */
24241 if (!reload_completed
)
24243 /* Nothing to do if ready list contains only 1 instruction. */
24247 /* Check that IMUL instruction is on the top of ready list. */
24248 insn
= ready
[n_ready
- 1];
24249 if (!NONDEBUG_INSN_P (insn
))
24251 insn
= PATTERN (insn
);
24252 if (GET_CODE (insn
) == PARALLEL
)
24253 insn
= XVECEXP (insn
, 0, 0);
24254 if (GET_CODE (insn
) != SET
)
24256 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24257 && GET_MODE (SET_SRC (insn
)) == SImode
))
24260 /* Search for producer of independent IMUL instruction. */
24261 for (i
= n_ready
- 2; i
>= 0; i
--)
24264 if (!NONDEBUG_INSN_P (insn
))
24266 /* Skip IMUL instruction. */
24267 insn2
= PATTERN (insn
);
24268 if (GET_CODE (insn2
) == PARALLEL
)
24269 insn2
= XVECEXP (insn2
, 0, 0);
24270 if (GET_CODE (insn2
) == SET
24271 && GET_CODE (SET_SRC (insn2
)) == MULT
24272 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24275 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24278 con
= DEP_CON (dep
);
24279 if (!NONDEBUG_INSN_P (con
))
24281 insn1
= PATTERN (con
);
24282 if (GET_CODE (insn1
) == PARALLEL
)
24283 insn1
= XVECEXP (insn1
, 0, 0);
24285 if (GET_CODE (insn1
) == SET
24286 && GET_CODE (SET_SRC (insn1
)) == MULT
24287 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24289 sd_iterator_def sd_it1
;
24291 /* Check if there is no other dependee for IMUL. */
24293 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24296 pro
= DEP_PRO (dep1
);
24297 if (!NONDEBUG_INSN_P (pro
))
24310 return issue_rate
; /* Didn't find IMUL producer. */
24312 if (sched_verbose
> 1)
24313 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24314 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24316 /* Put IMUL producer (ready[index]) at the top of ready list. */
24317 insn1
= ready
[index
];
24318 for (i
= index
; i
< n_ready
- 1; i
++)
24319 ready
[i
] = ready
[i
+ 1];
24320 ready
[n_ready
- 1] = insn1
;
24326 ix86_class_likely_spilled_p (reg_class_t
);
24328 /* Returns true if lhs of insn is HW function argument register and set up
24329 is_spilled to true if it is likely spilled HW register. */
24331 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24335 if (!NONDEBUG_INSN_P (insn
))
24337 /* Call instructions are not movable, ignore it. */
24340 insn
= PATTERN (insn
);
24341 if (GET_CODE (insn
) == PARALLEL
)
24342 insn
= XVECEXP (insn
, 0, 0);
24343 if (GET_CODE (insn
) != SET
)
24345 dst
= SET_DEST (insn
);
24346 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24347 && ix86_function_arg_regno_p (REGNO (dst
)))
24349 /* Is it likely spilled HW register? */
24350 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24351 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24352 *is_spilled
= true;
24358 /* Add output dependencies for chain of function adjacent arguments if only
24359 there is a move to likely spilled HW register. Return first argument
24360 if at least one dependence was added or NULL otherwise. */
24362 add_parameter_dependencies (rtx call
, rtx head
)
24366 rtx first_arg
= NULL
;
24367 bool is_spilled
= false;
24369 head
= PREV_INSN (head
);
24371 /* Find nearest to call argument passing instruction. */
24374 last
= PREV_INSN (last
);
24377 if (!NONDEBUG_INSN_P (last
))
24379 if (insn_is_function_arg (last
, &is_spilled
))
24387 insn
= PREV_INSN (last
);
24388 if (!INSN_P (insn
))
24392 if (!NONDEBUG_INSN_P (insn
))
24397 if (insn_is_function_arg (insn
, &is_spilled
))
24399 /* Add output depdendence between two function arguments if chain
24400 of output arguments contains likely spilled HW registers. */
24402 add_dependence (last
, insn
, REG_DEP_OUTPUT
);
24403 first_arg
= last
= insn
;
24413 /* Add output or anti dependency from insn to first_arg to restrict its code
24416 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
24421 set
= single_set (insn
);
24424 tmp
= SET_DEST (set
);
24427 /* Add output dependency to the first function argument. */
24428 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
24431 /* Add anti dependency. */
24432 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
24435 /* Avoid cross block motion of function argument through adding dependency
24436 from the first non-jump instruction in bb. */
24438 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
24440 rtx insn
= BB_END (bb
);
24444 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
24446 rtx set
= single_set (insn
);
24449 avoid_func_arg_motion (arg
, insn
);
24453 if (insn
== BB_HEAD (bb
))
24455 insn
= PREV_INSN (insn
);
24459 /* Hook for pre-reload schedule - avoid motion of function arguments
24460 passed in likely spilled HW registers. */
24462 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
24465 rtx first_arg
= NULL
;
24466 if (reload_completed
)
24468 while (head
!= tail
&& DEBUG_INSN_P (head
))
24469 head
= NEXT_INSN (head
);
24470 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
24471 if (INSN_P (insn
) && CALL_P (insn
))
24473 first_arg
= add_parameter_dependencies (insn
, head
);
24476 /* Add dependee for first argument to predecessors if only
24477 region contains more than one block. */
24478 basic_block bb
= BLOCK_FOR_INSN (insn
);
24479 int rgn
= CONTAINING_RGN (bb
->index
);
24480 int nr_blks
= RGN_NR_BLOCKS (rgn
);
24481 /* Skip trivial regions and region head blocks that can have
24482 predecessors outside of region. */
24483 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
24487 /* Assume that region is SCC, i.e. all immediate predecessors
24488 of non-head block are in the same region. */
24489 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
24491 /* Avoid creating of loop-carried dependencies through
24492 using topological odering in region. */
24493 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
24494 add_dependee_for_func_arg (first_arg
, e
->src
);
24502 else if (first_arg
)
24503 avoid_func_arg_motion (first_arg
, insn
);
24506 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24507 HW registers to maximum, to schedule them at soon as possible. These are
24508 moves from function argument registers at the top of the function entry
24509 and moves from function return value registers after call. */
24511 ix86_adjust_priority (rtx insn
, int priority
)
24515 if (reload_completed
)
24518 if (!NONDEBUG_INSN_P (insn
))
24521 set
= single_set (insn
);
24524 rtx tmp
= SET_SRC (set
);
24526 && HARD_REGISTER_P (tmp
)
24527 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
24528 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
24529 return current_sched_info
->sched_max_insns_priority
;
24535 /* Model decoder of Core 2/i7.
24536 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24537 track the instruction fetch block boundaries and make sure that long
24538 (9+ bytes) instructions are assigned to D0. */
24540 /* Maximum length of an insn that can be handled by
24541 a secondary decoder unit. '8' for Core 2/i7. */
24542 static int core2i7_secondary_decoder_max_insn_size
;
24544 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24545 '16' for Core 2/i7. */
24546 static int core2i7_ifetch_block_size
;
24548 /* Maximum number of instructions decoder can handle per cycle.
24549 '6' for Core 2/i7. */
24550 static int core2i7_ifetch_block_max_insns
;
24552 typedef struct ix86_first_cycle_multipass_data_
*
24553 ix86_first_cycle_multipass_data_t
;
24554 typedef const struct ix86_first_cycle_multipass_data_
*
24555 const_ix86_first_cycle_multipass_data_t
;
24557 /* A variable to store target state across calls to max_issue within
24559 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24560 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24562 /* Initialize DATA. */
24564 core2i7_first_cycle_multipass_init (void *_data
)
24566 ix86_first_cycle_multipass_data_t data
24567 = (ix86_first_cycle_multipass_data_t
) _data
;
24569 data
->ifetch_block_len
= 0;
24570 data
->ifetch_block_n_insns
= 0;
24571 data
->ready_try_change
= NULL
;
24572 data
->ready_try_change_size
= 0;
24575 /* Advancing the cycle; reset ifetch block counts. */
24577 core2i7_dfa_post_advance_cycle (void)
24579 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24581 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24583 data
->ifetch_block_len
= 0;
24584 data
->ifetch_block_n_insns
= 0;
24587 static int min_insn_size (rtx
);
24589 /* Filter out insns from ready_try that the core will not be able to issue
24590 on current cycle due to decoder. */
24592 core2i7_first_cycle_multipass_filter_ready_try
24593 (const_ix86_first_cycle_multipass_data_t data
,
24594 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24601 if (ready_try
[n_ready
])
24604 insn
= get_ready_element (n_ready
);
24605 insn_size
= min_insn_size (insn
);
24607 if (/* If this is a too long an insn for a secondary decoder ... */
24608 (!first_cycle_insn_p
24609 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24610 /* ... or it would not fit into the ifetch block ... */
24611 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24612 /* ... or the decoder is full already ... */
24613 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24614 /* ... mask the insn out. */
24616 ready_try
[n_ready
] = 1;
24618 if (data
->ready_try_change
)
24619 bitmap_set_bit (data
->ready_try_change
, n_ready
);
24624 /* Prepare for a new round of multipass lookahead scheduling. */
24626 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24627 bool first_cycle_insn_p
)
24629 ix86_first_cycle_multipass_data_t data
24630 = (ix86_first_cycle_multipass_data_t
) _data
;
24631 const_ix86_first_cycle_multipass_data_t prev_data
24632 = ix86_first_cycle_multipass_data
;
24634 /* Restore the state from the end of the previous round. */
24635 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24636 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24638 /* Filter instructions that cannot be issued on current cycle due to
24639 decoder restrictions. */
24640 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24641 first_cycle_insn_p
);
24644 /* INSN is being issued in current solution. Account for its impact on
24645 the decoder model. */
24647 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24648 rtx insn
, const void *_prev_data
)
24650 ix86_first_cycle_multipass_data_t data
24651 = (ix86_first_cycle_multipass_data_t
) _data
;
24652 const_ix86_first_cycle_multipass_data_t prev_data
24653 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24655 int insn_size
= min_insn_size (insn
);
24657 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24658 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24659 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24660 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24662 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24663 if (!data
->ready_try_change
)
24665 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24666 data
->ready_try_change_size
= n_ready
;
24668 else if (data
->ready_try_change_size
< n_ready
)
24670 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24672 data
->ready_try_change_size
= n_ready
;
24674 bitmap_clear (data
->ready_try_change
);
24676 /* Filter out insns from ready_try that the core will not be able to issue
24677 on current cycle due to decoder. */
24678 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24682 /* Revert the effect on ready_try. */
24684 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24686 int n_ready ATTRIBUTE_UNUSED
)
24688 const_ix86_first_cycle_multipass_data_t data
24689 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24690 unsigned int i
= 0;
24691 sbitmap_iterator sbi
;
24693 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24694 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
24700 /* Save the result of multipass lookahead scheduling for the next round. */
24702 core2i7_first_cycle_multipass_end (const void *_data
)
24704 const_ix86_first_cycle_multipass_data_t data
24705 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24706 ix86_first_cycle_multipass_data_t next_data
24707 = ix86_first_cycle_multipass_data
;
24711 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24712 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24716 /* Deallocate target data. */
24718 core2i7_first_cycle_multipass_fini (void *_data
)
24720 ix86_first_cycle_multipass_data_t data
24721 = (ix86_first_cycle_multipass_data_t
) _data
;
24723 if (data
->ready_try_change
)
24725 sbitmap_free (data
->ready_try_change
);
24726 data
->ready_try_change
= NULL
;
24727 data
->ready_try_change_size
= 0;
24731 /* Prepare for scheduling pass. */
24733 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24734 int verbose ATTRIBUTE_UNUSED
,
24735 int max_uid ATTRIBUTE_UNUSED
)
24737 /* Install scheduling hooks for current CPU. Some of these hooks are used
24738 in time-critical parts of the scheduler, so we only set them up when
24739 they are actually used. */
24742 case PROCESSOR_CORE2_32
:
24743 case PROCESSOR_CORE2_64
:
24744 case PROCESSOR_COREI7_32
:
24745 case PROCESSOR_COREI7_64
:
24746 /* Do not perform multipass scheduling for pre-reload schedule
24747 to save compile time. */
24748 if (reload_completed
)
24750 targetm
.sched
.dfa_post_advance_cycle
24751 = core2i7_dfa_post_advance_cycle
;
24752 targetm
.sched
.first_cycle_multipass_init
24753 = core2i7_first_cycle_multipass_init
;
24754 targetm
.sched
.first_cycle_multipass_begin
24755 = core2i7_first_cycle_multipass_begin
;
24756 targetm
.sched
.first_cycle_multipass_issue
24757 = core2i7_first_cycle_multipass_issue
;
24758 targetm
.sched
.first_cycle_multipass_backtrack
24759 = core2i7_first_cycle_multipass_backtrack
;
24760 targetm
.sched
.first_cycle_multipass_end
24761 = core2i7_first_cycle_multipass_end
;
24762 targetm
.sched
.first_cycle_multipass_fini
24763 = core2i7_first_cycle_multipass_fini
;
24765 /* Set decoder parameters. */
24766 core2i7_secondary_decoder_max_insn_size
= 8;
24767 core2i7_ifetch_block_size
= 16;
24768 core2i7_ifetch_block_max_insns
= 6;
24771 /* ... Fall through ... */
24773 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24774 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24775 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24776 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24777 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24778 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24779 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24785 /* Compute the alignment given to a constant that is being placed in memory.
24786 EXP is the constant and ALIGN is the alignment that the object would
24788 The value of this function is used instead of that alignment to align
24792 ix86_constant_alignment (tree exp
, int align
)
24794 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24795 || TREE_CODE (exp
) == INTEGER_CST
)
24797 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24799 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24802 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24803 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24804 return BITS_PER_WORD
;
24809 /* Compute the alignment for a static variable.
24810 TYPE is the data type, and ALIGN is the alignment that
24811 the object would ordinarily have. The value of this function is used
24812 instead of that alignment to align the object. */
24815 ix86_data_alignment (tree type
, int align
)
24817 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24819 if (AGGREGATE_TYPE_P (type
)
24820 && TYPE_SIZE (type
)
24821 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24822 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24823 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24824 && align
< max_align
)
24827 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24828 to 16byte boundary. */
24831 if (AGGREGATE_TYPE_P (type
)
24832 && TYPE_SIZE (type
)
24833 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24834 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24835 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24839 if (TREE_CODE (type
) == ARRAY_TYPE
)
24841 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24843 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24846 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24849 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24851 if ((TYPE_MODE (type
) == XCmode
24852 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24855 else if ((TREE_CODE (type
) == RECORD_TYPE
24856 || TREE_CODE (type
) == UNION_TYPE
24857 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24858 && TYPE_FIELDS (type
))
24860 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24862 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24865 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24866 || TREE_CODE (type
) == INTEGER_TYPE
)
24868 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24870 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24877 /* Compute the alignment for a local variable or a stack slot. EXP is
24878 the data type or decl itself, MODE is the widest mode available and
24879 ALIGN is the alignment that the object would ordinarily have. The
24880 value of this macro is used instead of that alignment to align the
24884 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24885 unsigned int align
)
24889 if (exp
&& DECL_P (exp
))
24891 type
= TREE_TYPE (exp
);
24900 /* Don't do dynamic stack realignment for long long objects with
24901 -mpreferred-stack-boundary=2. */
24904 && ix86_preferred_stack_boundary
< 64
24905 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24906 && (!type
|| !TYPE_USER_ALIGN (type
))
24907 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24910 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24911 register in MODE. We will return the largest alignment of XF
24915 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24916 align
= GET_MODE_ALIGNMENT (DFmode
);
24920 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24921 to 16byte boundary. Exact wording is:
24923 An array uses the same alignment as its elements, except that a local or
24924 global array variable of length at least 16 bytes or
24925 a C99 variable-length array variable always has alignment of at least 16 bytes.
24927 This was added to allow use of aligned SSE instructions at arrays. This
24928 rule is meant for static storage (where compiler can not do the analysis
24929 by itself). We follow it for automatic variables only when convenient.
24930 We fully control everything in the function compiled and functions from
24931 other unit can not rely on the alignment.
24933 Exclude va_list type. It is the common case of local array where
24934 we can not benefit from the alignment. */
24935 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24938 if (AGGREGATE_TYPE_P (type
)
24939 && (va_list_type_node
== NULL_TREE
24940 || (TYPE_MAIN_VARIANT (type
)
24941 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24942 && TYPE_SIZE (type
)
24943 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24944 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24945 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24948 if (TREE_CODE (type
) == ARRAY_TYPE
)
24950 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24952 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24955 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24957 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24959 if ((TYPE_MODE (type
) == XCmode
24960 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24963 else if ((TREE_CODE (type
) == RECORD_TYPE
24964 || TREE_CODE (type
) == UNION_TYPE
24965 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24966 && TYPE_FIELDS (type
))
24968 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24970 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24973 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24974 || TREE_CODE (type
) == INTEGER_TYPE
)
24977 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24979 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24985 /* Compute the minimum required alignment for dynamic stack realignment
24986 purposes for a local variable, parameter or a stack slot. EXP is
24987 the data type or decl itself, MODE is its mode and ALIGN is the
24988 alignment that the object would ordinarily have. */
24991 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24992 unsigned int align
)
24996 if (exp
&& DECL_P (exp
))
24998 type
= TREE_TYPE (exp
);
25007 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25010 /* Don't do dynamic stack realignment for long long objects with
25011 -mpreferred-stack-boundary=2. */
25012 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25013 && (!type
|| !TYPE_USER_ALIGN (type
))
25014 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25020 /* Find a location for the static chain incoming to a nested function.
25021 This is a register, unless all free registers are used by arguments. */
25024 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25028 if (!DECL_STATIC_CHAIN (fndecl
))
25033 /* We always use R10 in 64-bit mode. */
25041 /* By default in 32-bit mode we use ECX to pass the static chain. */
25044 fntype
= TREE_TYPE (fndecl
);
25045 ccvt
= ix86_get_callcvt (fntype
);
25046 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
25048 /* Fastcall functions use ecx/edx for arguments, which leaves
25049 us with EAX for the static chain.
25050 Thiscall functions use ecx for arguments, which also
25051 leaves us with EAX for the static chain. */
25054 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25056 /* For regparm 3, we have no free call-clobbered registers in
25057 which to store the static chain. In order to implement this,
25058 we have the trampoline push the static chain to the stack.
25059 However, we can't push a value below the return address when
25060 we call the nested function directly, so we have to use an
25061 alternate entry point. For this we use ESI, and have the
25062 alternate entry point push ESI, so that things appear the
25063 same once we're executing the nested function. */
25066 if (fndecl
== current_function_decl
)
25067 ix86_static_chain_on_stack
= true;
25068 return gen_frame_mem (SImode
,
25069 plus_constant (Pmode
,
25070 arg_pointer_rtx
, -8));
25076 return gen_rtx_REG (Pmode
, regno
);
25079 /* Emit RTL insns to initialize the variable parts of a trampoline.
25080 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25081 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25082 to be passed to the target function. */
25085 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25091 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25097 /* Load the function address to r11. Try to load address using
25098 the shorter movl instead of movabs. We may want to support
25099 movq for kernel mode, but kernel does not use trampolines at
25100 the moment. FNADDR is a 32bit address and may not be in
25101 DImode when ptr_mode == SImode. Always use movl in this
25103 if (ptr_mode
== SImode
25104 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25106 fnaddr
= copy_addr_to_reg (fnaddr
);
25108 mem
= adjust_address (m_tramp
, HImode
, offset
);
25109 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25111 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25112 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25117 mem
= adjust_address (m_tramp
, HImode
, offset
);
25118 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25120 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25121 emit_move_insn (mem
, fnaddr
);
25125 /* Load static chain using movabs to r10. Use the shorter movl
25126 instead of movabs when ptr_mode == SImode. */
25127 if (ptr_mode
== SImode
)
25138 mem
= adjust_address (m_tramp
, HImode
, offset
);
25139 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25141 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25142 emit_move_insn (mem
, chain_value
);
25145 /* Jump to r11; the last (unused) byte is a nop, only there to
25146 pad the write out to a single 32-bit store. */
25147 mem
= adjust_address (m_tramp
, SImode
, offset
);
25148 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25155 /* Depending on the static chain location, either load a register
25156 with a constant, or push the constant to the stack. All of the
25157 instructions are the same size. */
25158 chain
= ix86_static_chain (fndecl
, true);
25161 switch (REGNO (chain
))
25164 opcode
= 0xb8; break;
25166 opcode
= 0xb9; break;
25168 gcc_unreachable ();
25174 mem
= adjust_address (m_tramp
, QImode
, offset
);
25175 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25177 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25178 emit_move_insn (mem
, chain_value
);
25181 mem
= adjust_address (m_tramp
, QImode
, offset
);
25182 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25184 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25186 /* Compute offset from the end of the jmp to the target function.
25187 In the case in which the trampoline stores the static chain on
25188 the stack, we need to skip the first insn which pushes the
25189 (call-saved) register static chain; this push is 1 byte. */
25191 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25192 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25193 offset
- (MEM_P (chain
) ? 1 : 0)),
25194 NULL_RTX
, 1, OPTAB_DIRECT
);
25195 emit_move_insn (mem
, disp
);
25198 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25200 #ifdef HAVE_ENABLE_EXECUTE_STACK
25201 #ifdef CHECK_EXECUTE_STACK_ENABLED
25202 if (CHECK_EXECUTE_STACK_ENABLED
)
25204 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25205 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25209 /* The following file contains several enumerations and data structures
25210 built from the definitions in i386-builtin-types.def. */
25212 #include "i386-builtin-types.inc"
25214 /* Table for the ix86 builtin non-function types. */
25215 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25217 /* Retrieve an element from the above table, building some of
25218 the types lazily. */
25221 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25223 unsigned int index
;
25226 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25228 type
= ix86_builtin_type_tab
[(int) tcode
];
25232 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25233 if (tcode
<= IX86_BT_LAST_VECT
)
25235 enum machine_mode mode
;
25237 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25238 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25239 mode
= ix86_builtin_type_vect_mode
[index
];
25241 type
= build_vector_type_for_mode (itype
, mode
);
25247 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25248 if (tcode
<= IX86_BT_LAST_PTR
)
25249 quals
= TYPE_UNQUALIFIED
;
25251 quals
= TYPE_QUAL_CONST
;
25253 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25254 if (quals
!= TYPE_UNQUALIFIED
)
25255 itype
= build_qualified_type (itype
, quals
);
25257 type
= build_pointer_type (itype
);
25260 ix86_builtin_type_tab
[(int) tcode
] = type
;
25264 /* Table for the ix86 builtin function types. */
25265 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25267 /* Retrieve an element from the above table, building some of
25268 the types lazily. */
25271 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25275 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25277 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25281 if (tcode
<= IX86_BT_LAST_FUNC
)
25283 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25284 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25285 tree rtype
, atype
, args
= void_list_node
;
25288 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25289 for (i
= after
- 1; i
> start
; --i
)
25291 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25292 args
= tree_cons (NULL
, atype
, args
);
25295 type
= build_function_type (rtype
, args
);
25299 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25300 enum ix86_builtin_func_type icode
;
25302 icode
= ix86_builtin_func_alias_base
[index
];
25303 type
= ix86_get_builtin_func_type (icode
);
25306 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25311 /* Codes for all the SSE/MMX builtins. */
25314 IX86_BUILTIN_ADDPS
,
25315 IX86_BUILTIN_ADDSS
,
25316 IX86_BUILTIN_DIVPS
,
25317 IX86_BUILTIN_DIVSS
,
25318 IX86_BUILTIN_MULPS
,
25319 IX86_BUILTIN_MULSS
,
25320 IX86_BUILTIN_SUBPS
,
25321 IX86_BUILTIN_SUBSS
,
25323 IX86_BUILTIN_CMPEQPS
,
25324 IX86_BUILTIN_CMPLTPS
,
25325 IX86_BUILTIN_CMPLEPS
,
25326 IX86_BUILTIN_CMPGTPS
,
25327 IX86_BUILTIN_CMPGEPS
,
25328 IX86_BUILTIN_CMPNEQPS
,
25329 IX86_BUILTIN_CMPNLTPS
,
25330 IX86_BUILTIN_CMPNLEPS
,
25331 IX86_BUILTIN_CMPNGTPS
,
25332 IX86_BUILTIN_CMPNGEPS
,
25333 IX86_BUILTIN_CMPORDPS
,
25334 IX86_BUILTIN_CMPUNORDPS
,
25335 IX86_BUILTIN_CMPEQSS
,
25336 IX86_BUILTIN_CMPLTSS
,
25337 IX86_BUILTIN_CMPLESS
,
25338 IX86_BUILTIN_CMPNEQSS
,
25339 IX86_BUILTIN_CMPNLTSS
,
25340 IX86_BUILTIN_CMPNLESS
,
25341 IX86_BUILTIN_CMPNGTSS
,
25342 IX86_BUILTIN_CMPNGESS
,
25343 IX86_BUILTIN_CMPORDSS
,
25344 IX86_BUILTIN_CMPUNORDSS
,
25346 IX86_BUILTIN_COMIEQSS
,
25347 IX86_BUILTIN_COMILTSS
,
25348 IX86_BUILTIN_COMILESS
,
25349 IX86_BUILTIN_COMIGTSS
,
25350 IX86_BUILTIN_COMIGESS
,
25351 IX86_BUILTIN_COMINEQSS
,
25352 IX86_BUILTIN_UCOMIEQSS
,
25353 IX86_BUILTIN_UCOMILTSS
,
25354 IX86_BUILTIN_UCOMILESS
,
25355 IX86_BUILTIN_UCOMIGTSS
,
25356 IX86_BUILTIN_UCOMIGESS
,
25357 IX86_BUILTIN_UCOMINEQSS
,
25359 IX86_BUILTIN_CVTPI2PS
,
25360 IX86_BUILTIN_CVTPS2PI
,
25361 IX86_BUILTIN_CVTSI2SS
,
25362 IX86_BUILTIN_CVTSI642SS
,
25363 IX86_BUILTIN_CVTSS2SI
,
25364 IX86_BUILTIN_CVTSS2SI64
,
25365 IX86_BUILTIN_CVTTPS2PI
,
25366 IX86_BUILTIN_CVTTSS2SI
,
25367 IX86_BUILTIN_CVTTSS2SI64
,
25369 IX86_BUILTIN_MAXPS
,
25370 IX86_BUILTIN_MAXSS
,
25371 IX86_BUILTIN_MINPS
,
25372 IX86_BUILTIN_MINSS
,
25374 IX86_BUILTIN_LOADUPS
,
25375 IX86_BUILTIN_STOREUPS
,
25376 IX86_BUILTIN_MOVSS
,
25378 IX86_BUILTIN_MOVHLPS
,
25379 IX86_BUILTIN_MOVLHPS
,
25380 IX86_BUILTIN_LOADHPS
,
25381 IX86_BUILTIN_LOADLPS
,
25382 IX86_BUILTIN_STOREHPS
,
25383 IX86_BUILTIN_STORELPS
,
25385 IX86_BUILTIN_MASKMOVQ
,
25386 IX86_BUILTIN_MOVMSKPS
,
25387 IX86_BUILTIN_PMOVMSKB
,
25389 IX86_BUILTIN_MOVNTPS
,
25390 IX86_BUILTIN_MOVNTQ
,
25392 IX86_BUILTIN_LOADDQU
,
25393 IX86_BUILTIN_STOREDQU
,
25395 IX86_BUILTIN_PACKSSWB
,
25396 IX86_BUILTIN_PACKSSDW
,
25397 IX86_BUILTIN_PACKUSWB
,
25399 IX86_BUILTIN_PADDB
,
25400 IX86_BUILTIN_PADDW
,
25401 IX86_BUILTIN_PADDD
,
25402 IX86_BUILTIN_PADDQ
,
25403 IX86_BUILTIN_PADDSB
,
25404 IX86_BUILTIN_PADDSW
,
25405 IX86_BUILTIN_PADDUSB
,
25406 IX86_BUILTIN_PADDUSW
,
25407 IX86_BUILTIN_PSUBB
,
25408 IX86_BUILTIN_PSUBW
,
25409 IX86_BUILTIN_PSUBD
,
25410 IX86_BUILTIN_PSUBQ
,
25411 IX86_BUILTIN_PSUBSB
,
25412 IX86_BUILTIN_PSUBSW
,
25413 IX86_BUILTIN_PSUBUSB
,
25414 IX86_BUILTIN_PSUBUSW
,
25417 IX86_BUILTIN_PANDN
,
25421 IX86_BUILTIN_PAVGB
,
25422 IX86_BUILTIN_PAVGW
,
25424 IX86_BUILTIN_PCMPEQB
,
25425 IX86_BUILTIN_PCMPEQW
,
25426 IX86_BUILTIN_PCMPEQD
,
25427 IX86_BUILTIN_PCMPGTB
,
25428 IX86_BUILTIN_PCMPGTW
,
25429 IX86_BUILTIN_PCMPGTD
,
25431 IX86_BUILTIN_PMADDWD
,
25433 IX86_BUILTIN_PMAXSW
,
25434 IX86_BUILTIN_PMAXUB
,
25435 IX86_BUILTIN_PMINSW
,
25436 IX86_BUILTIN_PMINUB
,
25438 IX86_BUILTIN_PMULHUW
,
25439 IX86_BUILTIN_PMULHW
,
25440 IX86_BUILTIN_PMULLW
,
25442 IX86_BUILTIN_PSADBW
,
25443 IX86_BUILTIN_PSHUFW
,
25445 IX86_BUILTIN_PSLLW
,
25446 IX86_BUILTIN_PSLLD
,
25447 IX86_BUILTIN_PSLLQ
,
25448 IX86_BUILTIN_PSRAW
,
25449 IX86_BUILTIN_PSRAD
,
25450 IX86_BUILTIN_PSRLW
,
25451 IX86_BUILTIN_PSRLD
,
25452 IX86_BUILTIN_PSRLQ
,
25453 IX86_BUILTIN_PSLLWI
,
25454 IX86_BUILTIN_PSLLDI
,
25455 IX86_BUILTIN_PSLLQI
,
25456 IX86_BUILTIN_PSRAWI
,
25457 IX86_BUILTIN_PSRADI
,
25458 IX86_BUILTIN_PSRLWI
,
25459 IX86_BUILTIN_PSRLDI
,
25460 IX86_BUILTIN_PSRLQI
,
25462 IX86_BUILTIN_PUNPCKHBW
,
25463 IX86_BUILTIN_PUNPCKHWD
,
25464 IX86_BUILTIN_PUNPCKHDQ
,
25465 IX86_BUILTIN_PUNPCKLBW
,
25466 IX86_BUILTIN_PUNPCKLWD
,
25467 IX86_BUILTIN_PUNPCKLDQ
,
25469 IX86_BUILTIN_SHUFPS
,
25471 IX86_BUILTIN_RCPPS
,
25472 IX86_BUILTIN_RCPSS
,
25473 IX86_BUILTIN_RSQRTPS
,
25474 IX86_BUILTIN_RSQRTPS_NR
,
25475 IX86_BUILTIN_RSQRTSS
,
25476 IX86_BUILTIN_RSQRTF
,
25477 IX86_BUILTIN_SQRTPS
,
25478 IX86_BUILTIN_SQRTPS_NR
,
25479 IX86_BUILTIN_SQRTSS
,
25481 IX86_BUILTIN_UNPCKHPS
,
25482 IX86_BUILTIN_UNPCKLPS
,
25484 IX86_BUILTIN_ANDPS
,
25485 IX86_BUILTIN_ANDNPS
,
25487 IX86_BUILTIN_XORPS
,
25490 IX86_BUILTIN_LDMXCSR
,
25491 IX86_BUILTIN_STMXCSR
,
25492 IX86_BUILTIN_SFENCE
,
25494 IX86_BUILTIN_FXSAVE
,
25495 IX86_BUILTIN_FXRSTOR
,
25496 IX86_BUILTIN_FXSAVE64
,
25497 IX86_BUILTIN_FXRSTOR64
,
25499 IX86_BUILTIN_XSAVE
,
25500 IX86_BUILTIN_XRSTOR
,
25501 IX86_BUILTIN_XSAVE64
,
25502 IX86_BUILTIN_XRSTOR64
,
25504 IX86_BUILTIN_XSAVEOPT
,
25505 IX86_BUILTIN_XSAVEOPT64
,
25507 /* 3DNow! Original */
25508 IX86_BUILTIN_FEMMS
,
25509 IX86_BUILTIN_PAVGUSB
,
25510 IX86_BUILTIN_PF2ID
,
25511 IX86_BUILTIN_PFACC
,
25512 IX86_BUILTIN_PFADD
,
25513 IX86_BUILTIN_PFCMPEQ
,
25514 IX86_BUILTIN_PFCMPGE
,
25515 IX86_BUILTIN_PFCMPGT
,
25516 IX86_BUILTIN_PFMAX
,
25517 IX86_BUILTIN_PFMIN
,
25518 IX86_BUILTIN_PFMUL
,
25519 IX86_BUILTIN_PFRCP
,
25520 IX86_BUILTIN_PFRCPIT1
,
25521 IX86_BUILTIN_PFRCPIT2
,
25522 IX86_BUILTIN_PFRSQIT1
,
25523 IX86_BUILTIN_PFRSQRT
,
25524 IX86_BUILTIN_PFSUB
,
25525 IX86_BUILTIN_PFSUBR
,
25526 IX86_BUILTIN_PI2FD
,
25527 IX86_BUILTIN_PMULHRW
,
25529 /* 3DNow! Athlon Extensions */
25530 IX86_BUILTIN_PF2IW
,
25531 IX86_BUILTIN_PFNACC
,
25532 IX86_BUILTIN_PFPNACC
,
25533 IX86_BUILTIN_PI2FW
,
25534 IX86_BUILTIN_PSWAPDSI
,
25535 IX86_BUILTIN_PSWAPDSF
,
25538 IX86_BUILTIN_ADDPD
,
25539 IX86_BUILTIN_ADDSD
,
25540 IX86_BUILTIN_DIVPD
,
25541 IX86_BUILTIN_DIVSD
,
25542 IX86_BUILTIN_MULPD
,
25543 IX86_BUILTIN_MULSD
,
25544 IX86_BUILTIN_SUBPD
,
25545 IX86_BUILTIN_SUBSD
,
25547 IX86_BUILTIN_CMPEQPD
,
25548 IX86_BUILTIN_CMPLTPD
,
25549 IX86_BUILTIN_CMPLEPD
,
25550 IX86_BUILTIN_CMPGTPD
,
25551 IX86_BUILTIN_CMPGEPD
,
25552 IX86_BUILTIN_CMPNEQPD
,
25553 IX86_BUILTIN_CMPNLTPD
,
25554 IX86_BUILTIN_CMPNLEPD
,
25555 IX86_BUILTIN_CMPNGTPD
,
25556 IX86_BUILTIN_CMPNGEPD
,
25557 IX86_BUILTIN_CMPORDPD
,
25558 IX86_BUILTIN_CMPUNORDPD
,
25559 IX86_BUILTIN_CMPEQSD
,
25560 IX86_BUILTIN_CMPLTSD
,
25561 IX86_BUILTIN_CMPLESD
,
25562 IX86_BUILTIN_CMPNEQSD
,
25563 IX86_BUILTIN_CMPNLTSD
,
25564 IX86_BUILTIN_CMPNLESD
,
25565 IX86_BUILTIN_CMPORDSD
,
25566 IX86_BUILTIN_CMPUNORDSD
,
25568 IX86_BUILTIN_COMIEQSD
,
25569 IX86_BUILTIN_COMILTSD
,
25570 IX86_BUILTIN_COMILESD
,
25571 IX86_BUILTIN_COMIGTSD
,
25572 IX86_BUILTIN_COMIGESD
,
25573 IX86_BUILTIN_COMINEQSD
,
25574 IX86_BUILTIN_UCOMIEQSD
,
25575 IX86_BUILTIN_UCOMILTSD
,
25576 IX86_BUILTIN_UCOMILESD
,
25577 IX86_BUILTIN_UCOMIGTSD
,
25578 IX86_BUILTIN_UCOMIGESD
,
25579 IX86_BUILTIN_UCOMINEQSD
,
25581 IX86_BUILTIN_MAXPD
,
25582 IX86_BUILTIN_MAXSD
,
25583 IX86_BUILTIN_MINPD
,
25584 IX86_BUILTIN_MINSD
,
25586 IX86_BUILTIN_ANDPD
,
25587 IX86_BUILTIN_ANDNPD
,
25589 IX86_BUILTIN_XORPD
,
25591 IX86_BUILTIN_SQRTPD
,
25592 IX86_BUILTIN_SQRTSD
,
25594 IX86_BUILTIN_UNPCKHPD
,
25595 IX86_BUILTIN_UNPCKLPD
,
25597 IX86_BUILTIN_SHUFPD
,
25599 IX86_BUILTIN_LOADUPD
,
25600 IX86_BUILTIN_STOREUPD
,
25601 IX86_BUILTIN_MOVSD
,
25603 IX86_BUILTIN_LOADHPD
,
25604 IX86_BUILTIN_LOADLPD
,
25606 IX86_BUILTIN_CVTDQ2PD
,
25607 IX86_BUILTIN_CVTDQ2PS
,
25609 IX86_BUILTIN_CVTPD2DQ
,
25610 IX86_BUILTIN_CVTPD2PI
,
25611 IX86_BUILTIN_CVTPD2PS
,
25612 IX86_BUILTIN_CVTTPD2DQ
,
25613 IX86_BUILTIN_CVTTPD2PI
,
25615 IX86_BUILTIN_CVTPI2PD
,
25616 IX86_BUILTIN_CVTSI2SD
,
25617 IX86_BUILTIN_CVTSI642SD
,
25619 IX86_BUILTIN_CVTSD2SI
,
25620 IX86_BUILTIN_CVTSD2SI64
,
25621 IX86_BUILTIN_CVTSD2SS
,
25622 IX86_BUILTIN_CVTSS2SD
,
25623 IX86_BUILTIN_CVTTSD2SI
,
25624 IX86_BUILTIN_CVTTSD2SI64
,
25626 IX86_BUILTIN_CVTPS2DQ
,
25627 IX86_BUILTIN_CVTPS2PD
,
25628 IX86_BUILTIN_CVTTPS2DQ
,
25630 IX86_BUILTIN_MOVNTI
,
25631 IX86_BUILTIN_MOVNTI64
,
25632 IX86_BUILTIN_MOVNTPD
,
25633 IX86_BUILTIN_MOVNTDQ
,
25635 IX86_BUILTIN_MOVQ128
,
25638 IX86_BUILTIN_MASKMOVDQU
,
25639 IX86_BUILTIN_MOVMSKPD
,
25640 IX86_BUILTIN_PMOVMSKB128
,
25642 IX86_BUILTIN_PACKSSWB128
,
25643 IX86_BUILTIN_PACKSSDW128
,
25644 IX86_BUILTIN_PACKUSWB128
,
25646 IX86_BUILTIN_PADDB128
,
25647 IX86_BUILTIN_PADDW128
,
25648 IX86_BUILTIN_PADDD128
,
25649 IX86_BUILTIN_PADDQ128
,
25650 IX86_BUILTIN_PADDSB128
,
25651 IX86_BUILTIN_PADDSW128
,
25652 IX86_BUILTIN_PADDUSB128
,
25653 IX86_BUILTIN_PADDUSW128
,
25654 IX86_BUILTIN_PSUBB128
,
25655 IX86_BUILTIN_PSUBW128
,
25656 IX86_BUILTIN_PSUBD128
,
25657 IX86_BUILTIN_PSUBQ128
,
25658 IX86_BUILTIN_PSUBSB128
,
25659 IX86_BUILTIN_PSUBSW128
,
25660 IX86_BUILTIN_PSUBUSB128
,
25661 IX86_BUILTIN_PSUBUSW128
,
25663 IX86_BUILTIN_PAND128
,
25664 IX86_BUILTIN_PANDN128
,
25665 IX86_BUILTIN_POR128
,
25666 IX86_BUILTIN_PXOR128
,
25668 IX86_BUILTIN_PAVGB128
,
25669 IX86_BUILTIN_PAVGW128
,
25671 IX86_BUILTIN_PCMPEQB128
,
25672 IX86_BUILTIN_PCMPEQW128
,
25673 IX86_BUILTIN_PCMPEQD128
,
25674 IX86_BUILTIN_PCMPGTB128
,
25675 IX86_BUILTIN_PCMPGTW128
,
25676 IX86_BUILTIN_PCMPGTD128
,
25678 IX86_BUILTIN_PMADDWD128
,
25680 IX86_BUILTIN_PMAXSW128
,
25681 IX86_BUILTIN_PMAXUB128
,
25682 IX86_BUILTIN_PMINSW128
,
25683 IX86_BUILTIN_PMINUB128
,
25685 IX86_BUILTIN_PMULUDQ
,
25686 IX86_BUILTIN_PMULUDQ128
,
25687 IX86_BUILTIN_PMULHUW128
,
25688 IX86_BUILTIN_PMULHW128
,
25689 IX86_BUILTIN_PMULLW128
,
25691 IX86_BUILTIN_PSADBW128
,
25692 IX86_BUILTIN_PSHUFHW
,
25693 IX86_BUILTIN_PSHUFLW
,
25694 IX86_BUILTIN_PSHUFD
,
25696 IX86_BUILTIN_PSLLDQI128
,
25697 IX86_BUILTIN_PSLLWI128
,
25698 IX86_BUILTIN_PSLLDI128
,
25699 IX86_BUILTIN_PSLLQI128
,
25700 IX86_BUILTIN_PSRAWI128
,
25701 IX86_BUILTIN_PSRADI128
,
25702 IX86_BUILTIN_PSRLDQI128
,
25703 IX86_BUILTIN_PSRLWI128
,
25704 IX86_BUILTIN_PSRLDI128
,
25705 IX86_BUILTIN_PSRLQI128
,
25707 IX86_BUILTIN_PSLLDQ128
,
25708 IX86_BUILTIN_PSLLW128
,
25709 IX86_BUILTIN_PSLLD128
,
25710 IX86_BUILTIN_PSLLQ128
,
25711 IX86_BUILTIN_PSRAW128
,
25712 IX86_BUILTIN_PSRAD128
,
25713 IX86_BUILTIN_PSRLW128
,
25714 IX86_BUILTIN_PSRLD128
,
25715 IX86_BUILTIN_PSRLQ128
,
25717 IX86_BUILTIN_PUNPCKHBW128
,
25718 IX86_BUILTIN_PUNPCKHWD128
,
25719 IX86_BUILTIN_PUNPCKHDQ128
,
25720 IX86_BUILTIN_PUNPCKHQDQ128
,
25721 IX86_BUILTIN_PUNPCKLBW128
,
25722 IX86_BUILTIN_PUNPCKLWD128
,
25723 IX86_BUILTIN_PUNPCKLDQ128
,
25724 IX86_BUILTIN_PUNPCKLQDQ128
,
25726 IX86_BUILTIN_CLFLUSH
,
25727 IX86_BUILTIN_MFENCE
,
25728 IX86_BUILTIN_LFENCE
,
25729 IX86_BUILTIN_PAUSE
,
25731 IX86_BUILTIN_BSRSI
,
25732 IX86_BUILTIN_BSRDI
,
25733 IX86_BUILTIN_RDPMC
,
25734 IX86_BUILTIN_RDTSC
,
25735 IX86_BUILTIN_RDTSCP
,
25736 IX86_BUILTIN_ROLQI
,
25737 IX86_BUILTIN_ROLHI
,
25738 IX86_BUILTIN_RORQI
,
25739 IX86_BUILTIN_RORHI
,
25742 IX86_BUILTIN_ADDSUBPS
,
25743 IX86_BUILTIN_HADDPS
,
25744 IX86_BUILTIN_HSUBPS
,
25745 IX86_BUILTIN_MOVSHDUP
,
25746 IX86_BUILTIN_MOVSLDUP
,
25747 IX86_BUILTIN_ADDSUBPD
,
25748 IX86_BUILTIN_HADDPD
,
25749 IX86_BUILTIN_HSUBPD
,
25750 IX86_BUILTIN_LDDQU
,
25752 IX86_BUILTIN_MONITOR
,
25753 IX86_BUILTIN_MWAIT
,
25756 IX86_BUILTIN_PHADDW
,
25757 IX86_BUILTIN_PHADDD
,
25758 IX86_BUILTIN_PHADDSW
,
25759 IX86_BUILTIN_PHSUBW
,
25760 IX86_BUILTIN_PHSUBD
,
25761 IX86_BUILTIN_PHSUBSW
,
25762 IX86_BUILTIN_PMADDUBSW
,
25763 IX86_BUILTIN_PMULHRSW
,
25764 IX86_BUILTIN_PSHUFB
,
25765 IX86_BUILTIN_PSIGNB
,
25766 IX86_BUILTIN_PSIGNW
,
25767 IX86_BUILTIN_PSIGND
,
25768 IX86_BUILTIN_PALIGNR
,
25769 IX86_BUILTIN_PABSB
,
25770 IX86_BUILTIN_PABSW
,
25771 IX86_BUILTIN_PABSD
,
25773 IX86_BUILTIN_PHADDW128
,
25774 IX86_BUILTIN_PHADDD128
,
25775 IX86_BUILTIN_PHADDSW128
,
25776 IX86_BUILTIN_PHSUBW128
,
25777 IX86_BUILTIN_PHSUBD128
,
25778 IX86_BUILTIN_PHSUBSW128
,
25779 IX86_BUILTIN_PMADDUBSW128
,
25780 IX86_BUILTIN_PMULHRSW128
,
25781 IX86_BUILTIN_PSHUFB128
,
25782 IX86_BUILTIN_PSIGNB128
,
25783 IX86_BUILTIN_PSIGNW128
,
25784 IX86_BUILTIN_PSIGND128
,
25785 IX86_BUILTIN_PALIGNR128
,
25786 IX86_BUILTIN_PABSB128
,
25787 IX86_BUILTIN_PABSW128
,
25788 IX86_BUILTIN_PABSD128
,
25790 /* AMDFAM10 - SSE4A New Instructions. */
25791 IX86_BUILTIN_MOVNTSD
,
25792 IX86_BUILTIN_MOVNTSS
,
25793 IX86_BUILTIN_EXTRQI
,
25794 IX86_BUILTIN_EXTRQ
,
25795 IX86_BUILTIN_INSERTQI
,
25796 IX86_BUILTIN_INSERTQ
,
25799 IX86_BUILTIN_BLENDPD
,
25800 IX86_BUILTIN_BLENDPS
,
25801 IX86_BUILTIN_BLENDVPD
,
25802 IX86_BUILTIN_BLENDVPS
,
25803 IX86_BUILTIN_PBLENDVB128
,
25804 IX86_BUILTIN_PBLENDW128
,
25809 IX86_BUILTIN_INSERTPS128
,
25811 IX86_BUILTIN_MOVNTDQA
,
25812 IX86_BUILTIN_MPSADBW128
,
25813 IX86_BUILTIN_PACKUSDW128
,
25814 IX86_BUILTIN_PCMPEQQ
,
25815 IX86_BUILTIN_PHMINPOSUW128
,
25817 IX86_BUILTIN_PMAXSB128
,
25818 IX86_BUILTIN_PMAXSD128
,
25819 IX86_BUILTIN_PMAXUD128
,
25820 IX86_BUILTIN_PMAXUW128
,
25822 IX86_BUILTIN_PMINSB128
,
25823 IX86_BUILTIN_PMINSD128
,
25824 IX86_BUILTIN_PMINUD128
,
25825 IX86_BUILTIN_PMINUW128
,
25827 IX86_BUILTIN_PMOVSXBW128
,
25828 IX86_BUILTIN_PMOVSXBD128
,
25829 IX86_BUILTIN_PMOVSXBQ128
,
25830 IX86_BUILTIN_PMOVSXWD128
,
25831 IX86_BUILTIN_PMOVSXWQ128
,
25832 IX86_BUILTIN_PMOVSXDQ128
,
25834 IX86_BUILTIN_PMOVZXBW128
,
25835 IX86_BUILTIN_PMOVZXBD128
,
25836 IX86_BUILTIN_PMOVZXBQ128
,
25837 IX86_BUILTIN_PMOVZXWD128
,
25838 IX86_BUILTIN_PMOVZXWQ128
,
25839 IX86_BUILTIN_PMOVZXDQ128
,
25841 IX86_BUILTIN_PMULDQ128
,
25842 IX86_BUILTIN_PMULLD128
,
25844 IX86_BUILTIN_ROUNDSD
,
25845 IX86_BUILTIN_ROUNDSS
,
25847 IX86_BUILTIN_ROUNDPD
,
25848 IX86_BUILTIN_ROUNDPS
,
25850 IX86_BUILTIN_FLOORPD
,
25851 IX86_BUILTIN_CEILPD
,
25852 IX86_BUILTIN_TRUNCPD
,
25853 IX86_BUILTIN_RINTPD
,
25854 IX86_BUILTIN_ROUNDPD_AZ
,
25856 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25857 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25858 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25860 IX86_BUILTIN_FLOORPS
,
25861 IX86_BUILTIN_CEILPS
,
25862 IX86_BUILTIN_TRUNCPS
,
25863 IX86_BUILTIN_RINTPS
,
25864 IX86_BUILTIN_ROUNDPS_AZ
,
25866 IX86_BUILTIN_FLOORPS_SFIX
,
25867 IX86_BUILTIN_CEILPS_SFIX
,
25868 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25870 IX86_BUILTIN_PTESTZ
,
25871 IX86_BUILTIN_PTESTC
,
25872 IX86_BUILTIN_PTESTNZC
,
25874 IX86_BUILTIN_VEC_INIT_V2SI
,
25875 IX86_BUILTIN_VEC_INIT_V4HI
,
25876 IX86_BUILTIN_VEC_INIT_V8QI
,
25877 IX86_BUILTIN_VEC_EXT_V2DF
,
25878 IX86_BUILTIN_VEC_EXT_V2DI
,
25879 IX86_BUILTIN_VEC_EXT_V4SF
,
25880 IX86_BUILTIN_VEC_EXT_V4SI
,
25881 IX86_BUILTIN_VEC_EXT_V8HI
,
25882 IX86_BUILTIN_VEC_EXT_V2SI
,
25883 IX86_BUILTIN_VEC_EXT_V4HI
,
25884 IX86_BUILTIN_VEC_EXT_V16QI
,
25885 IX86_BUILTIN_VEC_SET_V2DI
,
25886 IX86_BUILTIN_VEC_SET_V4SF
,
25887 IX86_BUILTIN_VEC_SET_V4SI
,
25888 IX86_BUILTIN_VEC_SET_V8HI
,
25889 IX86_BUILTIN_VEC_SET_V4HI
,
25890 IX86_BUILTIN_VEC_SET_V16QI
,
25892 IX86_BUILTIN_VEC_PACK_SFIX
,
25893 IX86_BUILTIN_VEC_PACK_SFIX256
,
25896 IX86_BUILTIN_CRC32QI
,
25897 IX86_BUILTIN_CRC32HI
,
25898 IX86_BUILTIN_CRC32SI
,
25899 IX86_BUILTIN_CRC32DI
,
25901 IX86_BUILTIN_PCMPESTRI128
,
25902 IX86_BUILTIN_PCMPESTRM128
,
25903 IX86_BUILTIN_PCMPESTRA128
,
25904 IX86_BUILTIN_PCMPESTRC128
,
25905 IX86_BUILTIN_PCMPESTRO128
,
25906 IX86_BUILTIN_PCMPESTRS128
,
25907 IX86_BUILTIN_PCMPESTRZ128
,
25908 IX86_BUILTIN_PCMPISTRI128
,
25909 IX86_BUILTIN_PCMPISTRM128
,
25910 IX86_BUILTIN_PCMPISTRA128
,
25911 IX86_BUILTIN_PCMPISTRC128
,
25912 IX86_BUILTIN_PCMPISTRO128
,
25913 IX86_BUILTIN_PCMPISTRS128
,
25914 IX86_BUILTIN_PCMPISTRZ128
,
25916 IX86_BUILTIN_PCMPGTQ
,
25918 /* AES instructions */
25919 IX86_BUILTIN_AESENC128
,
25920 IX86_BUILTIN_AESENCLAST128
,
25921 IX86_BUILTIN_AESDEC128
,
25922 IX86_BUILTIN_AESDECLAST128
,
25923 IX86_BUILTIN_AESIMC128
,
25924 IX86_BUILTIN_AESKEYGENASSIST128
,
25926 /* PCLMUL instruction */
25927 IX86_BUILTIN_PCLMULQDQ128
,
25930 IX86_BUILTIN_ADDPD256
,
25931 IX86_BUILTIN_ADDPS256
,
25932 IX86_BUILTIN_ADDSUBPD256
,
25933 IX86_BUILTIN_ADDSUBPS256
,
25934 IX86_BUILTIN_ANDPD256
,
25935 IX86_BUILTIN_ANDPS256
,
25936 IX86_BUILTIN_ANDNPD256
,
25937 IX86_BUILTIN_ANDNPS256
,
25938 IX86_BUILTIN_BLENDPD256
,
25939 IX86_BUILTIN_BLENDPS256
,
25940 IX86_BUILTIN_BLENDVPD256
,
25941 IX86_BUILTIN_BLENDVPS256
,
25942 IX86_BUILTIN_DIVPD256
,
25943 IX86_BUILTIN_DIVPS256
,
25944 IX86_BUILTIN_DPPS256
,
25945 IX86_BUILTIN_HADDPD256
,
25946 IX86_BUILTIN_HADDPS256
,
25947 IX86_BUILTIN_HSUBPD256
,
25948 IX86_BUILTIN_HSUBPS256
,
25949 IX86_BUILTIN_MAXPD256
,
25950 IX86_BUILTIN_MAXPS256
,
25951 IX86_BUILTIN_MINPD256
,
25952 IX86_BUILTIN_MINPS256
,
25953 IX86_BUILTIN_MULPD256
,
25954 IX86_BUILTIN_MULPS256
,
25955 IX86_BUILTIN_ORPD256
,
25956 IX86_BUILTIN_ORPS256
,
25957 IX86_BUILTIN_SHUFPD256
,
25958 IX86_BUILTIN_SHUFPS256
,
25959 IX86_BUILTIN_SUBPD256
,
25960 IX86_BUILTIN_SUBPS256
,
25961 IX86_BUILTIN_XORPD256
,
25962 IX86_BUILTIN_XORPS256
,
25963 IX86_BUILTIN_CMPSD
,
25964 IX86_BUILTIN_CMPSS
,
25965 IX86_BUILTIN_CMPPD
,
25966 IX86_BUILTIN_CMPPS
,
25967 IX86_BUILTIN_CMPPD256
,
25968 IX86_BUILTIN_CMPPS256
,
25969 IX86_BUILTIN_CVTDQ2PD256
,
25970 IX86_BUILTIN_CVTDQ2PS256
,
25971 IX86_BUILTIN_CVTPD2PS256
,
25972 IX86_BUILTIN_CVTPS2DQ256
,
25973 IX86_BUILTIN_CVTPS2PD256
,
25974 IX86_BUILTIN_CVTTPD2DQ256
,
25975 IX86_BUILTIN_CVTPD2DQ256
,
25976 IX86_BUILTIN_CVTTPS2DQ256
,
25977 IX86_BUILTIN_EXTRACTF128PD256
,
25978 IX86_BUILTIN_EXTRACTF128PS256
,
25979 IX86_BUILTIN_EXTRACTF128SI256
,
25980 IX86_BUILTIN_VZEROALL
,
25981 IX86_BUILTIN_VZEROUPPER
,
25982 IX86_BUILTIN_VPERMILVARPD
,
25983 IX86_BUILTIN_VPERMILVARPS
,
25984 IX86_BUILTIN_VPERMILVARPD256
,
25985 IX86_BUILTIN_VPERMILVARPS256
,
25986 IX86_BUILTIN_VPERMILPD
,
25987 IX86_BUILTIN_VPERMILPS
,
25988 IX86_BUILTIN_VPERMILPD256
,
25989 IX86_BUILTIN_VPERMILPS256
,
25990 IX86_BUILTIN_VPERMIL2PD
,
25991 IX86_BUILTIN_VPERMIL2PS
,
25992 IX86_BUILTIN_VPERMIL2PD256
,
25993 IX86_BUILTIN_VPERMIL2PS256
,
25994 IX86_BUILTIN_VPERM2F128PD256
,
25995 IX86_BUILTIN_VPERM2F128PS256
,
25996 IX86_BUILTIN_VPERM2F128SI256
,
25997 IX86_BUILTIN_VBROADCASTSS
,
25998 IX86_BUILTIN_VBROADCASTSD256
,
25999 IX86_BUILTIN_VBROADCASTSS256
,
26000 IX86_BUILTIN_VBROADCASTPD256
,
26001 IX86_BUILTIN_VBROADCASTPS256
,
26002 IX86_BUILTIN_VINSERTF128PD256
,
26003 IX86_BUILTIN_VINSERTF128PS256
,
26004 IX86_BUILTIN_VINSERTF128SI256
,
26005 IX86_BUILTIN_LOADUPD256
,
26006 IX86_BUILTIN_LOADUPS256
,
26007 IX86_BUILTIN_STOREUPD256
,
26008 IX86_BUILTIN_STOREUPS256
,
26009 IX86_BUILTIN_LDDQU256
,
26010 IX86_BUILTIN_MOVNTDQ256
,
26011 IX86_BUILTIN_MOVNTPD256
,
26012 IX86_BUILTIN_MOVNTPS256
,
26013 IX86_BUILTIN_LOADDQU256
,
26014 IX86_BUILTIN_STOREDQU256
,
26015 IX86_BUILTIN_MASKLOADPD
,
26016 IX86_BUILTIN_MASKLOADPS
,
26017 IX86_BUILTIN_MASKSTOREPD
,
26018 IX86_BUILTIN_MASKSTOREPS
,
26019 IX86_BUILTIN_MASKLOADPD256
,
26020 IX86_BUILTIN_MASKLOADPS256
,
26021 IX86_BUILTIN_MASKSTOREPD256
,
26022 IX86_BUILTIN_MASKSTOREPS256
,
26023 IX86_BUILTIN_MOVSHDUP256
,
26024 IX86_BUILTIN_MOVSLDUP256
,
26025 IX86_BUILTIN_MOVDDUP256
,
26027 IX86_BUILTIN_SQRTPD256
,
26028 IX86_BUILTIN_SQRTPS256
,
26029 IX86_BUILTIN_SQRTPS_NR256
,
26030 IX86_BUILTIN_RSQRTPS256
,
26031 IX86_BUILTIN_RSQRTPS_NR256
,
26033 IX86_BUILTIN_RCPPS256
,
26035 IX86_BUILTIN_ROUNDPD256
,
26036 IX86_BUILTIN_ROUNDPS256
,
26038 IX86_BUILTIN_FLOORPD256
,
26039 IX86_BUILTIN_CEILPD256
,
26040 IX86_BUILTIN_TRUNCPD256
,
26041 IX86_BUILTIN_RINTPD256
,
26042 IX86_BUILTIN_ROUNDPD_AZ256
,
26044 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26045 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26046 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26048 IX86_BUILTIN_FLOORPS256
,
26049 IX86_BUILTIN_CEILPS256
,
26050 IX86_BUILTIN_TRUNCPS256
,
26051 IX86_BUILTIN_RINTPS256
,
26052 IX86_BUILTIN_ROUNDPS_AZ256
,
26054 IX86_BUILTIN_FLOORPS_SFIX256
,
26055 IX86_BUILTIN_CEILPS_SFIX256
,
26056 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26058 IX86_BUILTIN_UNPCKHPD256
,
26059 IX86_BUILTIN_UNPCKLPD256
,
26060 IX86_BUILTIN_UNPCKHPS256
,
26061 IX86_BUILTIN_UNPCKLPS256
,
26063 IX86_BUILTIN_SI256_SI
,
26064 IX86_BUILTIN_PS256_PS
,
26065 IX86_BUILTIN_PD256_PD
,
26066 IX86_BUILTIN_SI_SI256
,
26067 IX86_BUILTIN_PS_PS256
,
26068 IX86_BUILTIN_PD_PD256
,
26070 IX86_BUILTIN_VTESTZPD
,
26071 IX86_BUILTIN_VTESTCPD
,
26072 IX86_BUILTIN_VTESTNZCPD
,
26073 IX86_BUILTIN_VTESTZPS
,
26074 IX86_BUILTIN_VTESTCPS
,
26075 IX86_BUILTIN_VTESTNZCPS
,
26076 IX86_BUILTIN_VTESTZPD256
,
26077 IX86_BUILTIN_VTESTCPD256
,
26078 IX86_BUILTIN_VTESTNZCPD256
,
26079 IX86_BUILTIN_VTESTZPS256
,
26080 IX86_BUILTIN_VTESTCPS256
,
26081 IX86_BUILTIN_VTESTNZCPS256
,
26082 IX86_BUILTIN_PTESTZ256
,
26083 IX86_BUILTIN_PTESTC256
,
26084 IX86_BUILTIN_PTESTNZC256
,
26086 IX86_BUILTIN_MOVMSKPD256
,
26087 IX86_BUILTIN_MOVMSKPS256
,
26090 IX86_BUILTIN_MPSADBW256
,
26091 IX86_BUILTIN_PABSB256
,
26092 IX86_BUILTIN_PABSW256
,
26093 IX86_BUILTIN_PABSD256
,
26094 IX86_BUILTIN_PACKSSDW256
,
26095 IX86_BUILTIN_PACKSSWB256
,
26096 IX86_BUILTIN_PACKUSDW256
,
26097 IX86_BUILTIN_PACKUSWB256
,
26098 IX86_BUILTIN_PADDB256
,
26099 IX86_BUILTIN_PADDW256
,
26100 IX86_BUILTIN_PADDD256
,
26101 IX86_BUILTIN_PADDQ256
,
26102 IX86_BUILTIN_PADDSB256
,
26103 IX86_BUILTIN_PADDSW256
,
26104 IX86_BUILTIN_PADDUSB256
,
26105 IX86_BUILTIN_PADDUSW256
,
26106 IX86_BUILTIN_PALIGNR256
,
26107 IX86_BUILTIN_AND256I
,
26108 IX86_BUILTIN_ANDNOT256I
,
26109 IX86_BUILTIN_PAVGB256
,
26110 IX86_BUILTIN_PAVGW256
,
26111 IX86_BUILTIN_PBLENDVB256
,
26112 IX86_BUILTIN_PBLENDVW256
,
26113 IX86_BUILTIN_PCMPEQB256
,
26114 IX86_BUILTIN_PCMPEQW256
,
26115 IX86_BUILTIN_PCMPEQD256
,
26116 IX86_BUILTIN_PCMPEQQ256
,
26117 IX86_BUILTIN_PCMPGTB256
,
26118 IX86_BUILTIN_PCMPGTW256
,
26119 IX86_BUILTIN_PCMPGTD256
,
26120 IX86_BUILTIN_PCMPGTQ256
,
26121 IX86_BUILTIN_PHADDW256
,
26122 IX86_BUILTIN_PHADDD256
,
26123 IX86_BUILTIN_PHADDSW256
,
26124 IX86_BUILTIN_PHSUBW256
,
26125 IX86_BUILTIN_PHSUBD256
,
26126 IX86_BUILTIN_PHSUBSW256
,
26127 IX86_BUILTIN_PMADDUBSW256
,
26128 IX86_BUILTIN_PMADDWD256
,
26129 IX86_BUILTIN_PMAXSB256
,
26130 IX86_BUILTIN_PMAXSW256
,
26131 IX86_BUILTIN_PMAXSD256
,
26132 IX86_BUILTIN_PMAXUB256
,
26133 IX86_BUILTIN_PMAXUW256
,
26134 IX86_BUILTIN_PMAXUD256
,
26135 IX86_BUILTIN_PMINSB256
,
26136 IX86_BUILTIN_PMINSW256
,
26137 IX86_BUILTIN_PMINSD256
,
26138 IX86_BUILTIN_PMINUB256
,
26139 IX86_BUILTIN_PMINUW256
,
26140 IX86_BUILTIN_PMINUD256
,
26141 IX86_BUILTIN_PMOVMSKB256
,
26142 IX86_BUILTIN_PMOVSXBW256
,
26143 IX86_BUILTIN_PMOVSXBD256
,
26144 IX86_BUILTIN_PMOVSXBQ256
,
26145 IX86_BUILTIN_PMOVSXWD256
,
26146 IX86_BUILTIN_PMOVSXWQ256
,
26147 IX86_BUILTIN_PMOVSXDQ256
,
26148 IX86_BUILTIN_PMOVZXBW256
,
26149 IX86_BUILTIN_PMOVZXBD256
,
26150 IX86_BUILTIN_PMOVZXBQ256
,
26151 IX86_BUILTIN_PMOVZXWD256
,
26152 IX86_BUILTIN_PMOVZXWQ256
,
26153 IX86_BUILTIN_PMOVZXDQ256
,
26154 IX86_BUILTIN_PMULDQ256
,
26155 IX86_BUILTIN_PMULHRSW256
,
26156 IX86_BUILTIN_PMULHUW256
,
26157 IX86_BUILTIN_PMULHW256
,
26158 IX86_BUILTIN_PMULLW256
,
26159 IX86_BUILTIN_PMULLD256
,
26160 IX86_BUILTIN_PMULUDQ256
,
26161 IX86_BUILTIN_POR256
,
26162 IX86_BUILTIN_PSADBW256
,
26163 IX86_BUILTIN_PSHUFB256
,
26164 IX86_BUILTIN_PSHUFD256
,
26165 IX86_BUILTIN_PSHUFHW256
,
26166 IX86_BUILTIN_PSHUFLW256
,
26167 IX86_BUILTIN_PSIGNB256
,
26168 IX86_BUILTIN_PSIGNW256
,
26169 IX86_BUILTIN_PSIGND256
,
26170 IX86_BUILTIN_PSLLDQI256
,
26171 IX86_BUILTIN_PSLLWI256
,
26172 IX86_BUILTIN_PSLLW256
,
26173 IX86_BUILTIN_PSLLDI256
,
26174 IX86_BUILTIN_PSLLD256
,
26175 IX86_BUILTIN_PSLLQI256
,
26176 IX86_BUILTIN_PSLLQ256
,
26177 IX86_BUILTIN_PSRAWI256
,
26178 IX86_BUILTIN_PSRAW256
,
26179 IX86_BUILTIN_PSRADI256
,
26180 IX86_BUILTIN_PSRAD256
,
26181 IX86_BUILTIN_PSRLDQI256
,
26182 IX86_BUILTIN_PSRLWI256
,
26183 IX86_BUILTIN_PSRLW256
,
26184 IX86_BUILTIN_PSRLDI256
,
26185 IX86_BUILTIN_PSRLD256
,
26186 IX86_BUILTIN_PSRLQI256
,
26187 IX86_BUILTIN_PSRLQ256
,
26188 IX86_BUILTIN_PSUBB256
,
26189 IX86_BUILTIN_PSUBW256
,
26190 IX86_BUILTIN_PSUBD256
,
26191 IX86_BUILTIN_PSUBQ256
,
26192 IX86_BUILTIN_PSUBSB256
,
26193 IX86_BUILTIN_PSUBSW256
,
26194 IX86_BUILTIN_PSUBUSB256
,
26195 IX86_BUILTIN_PSUBUSW256
,
26196 IX86_BUILTIN_PUNPCKHBW256
,
26197 IX86_BUILTIN_PUNPCKHWD256
,
26198 IX86_BUILTIN_PUNPCKHDQ256
,
26199 IX86_BUILTIN_PUNPCKHQDQ256
,
26200 IX86_BUILTIN_PUNPCKLBW256
,
26201 IX86_BUILTIN_PUNPCKLWD256
,
26202 IX86_BUILTIN_PUNPCKLDQ256
,
26203 IX86_BUILTIN_PUNPCKLQDQ256
,
26204 IX86_BUILTIN_PXOR256
,
26205 IX86_BUILTIN_MOVNTDQA256
,
26206 IX86_BUILTIN_VBROADCASTSS_PS
,
26207 IX86_BUILTIN_VBROADCASTSS_PS256
,
26208 IX86_BUILTIN_VBROADCASTSD_PD256
,
26209 IX86_BUILTIN_VBROADCASTSI256
,
26210 IX86_BUILTIN_PBLENDD256
,
26211 IX86_BUILTIN_PBLENDD128
,
26212 IX86_BUILTIN_PBROADCASTB256
,
26213 IX86_BUILTIN_PBROADCASTW256
,
26214 IX86_BUILTIN_PBROADCASTD256
,
26215 IX86_BUILTIN_PBROADCASTQ256
,
26216 IX86_BUILTIN_PBROADCASTB128
,
26217 IX86_BUILTIN_PBROADCASTW128
,
26218 IX86_BUILTIN_PBROADCASTD128
,
26219 IX86_BUILTIN_PBROADCASTQ128
,
26220 IX86_BUILTIN_VPERMVARSI256
,
26221 IX86_BUILTIN_VPERMDF256
,
26222 IX86_BUILTIN_VPERMVARSF256
,
26223 IX86_BUILTIN_VPERMDI256
,
26224 IX86_BUILTIN_VPERMTI256
,
26225 IX86_BUILTIN_VEXTRACT128I256
,
26226 IX86_BUILTIN_VINSERT128I256
,
26227 IX86_BUILTIN_MASKLOADD
,
26228 IX86_BUILTIN_MASKLOADQ
,
26229 IX86_BUILTIN_MASKLOADD256
,
26230 IX86_BUILTIN_MASKLOADQ256
,
26231 IX86_BUILTIN_MASKSTORED
,
26232 IX86_BUILTIN_MASKSTOREQ
,
26233 IX86_BUILTIN_MASKSTORED256
,
26234 IX86_BUILTIN_MASKSTOREQ256
,
26235 IX86_BUILTIN_PSLLVV4DI
,
26236 IX86_BUILTIN_PSLLVV2DI
,
26237 IX86_BUILTIN_PSLLVV8SI
,
26238 IX86_BUILTIN_PSLLVV4SI
,
26239 IX86_BUILTIN_PSRAVV8SI
,
26240 IX86_BUILTIN_PSRAVV4SI
,
26241 IX86_BUILTIN_PSRLVV4DI
,
26242 IX86_BUILTIN_PSRLVV2DI
,
26243 IX86_BUILTIN_PSRLVV8SI
,
26244 IX86_BUILTIN_PSRLVV4SI
,
26246 IX86_BUILTIN_GATHERSIV2DF
,
26247 IX86_BUILTIN_GATHERSIV4DF
,
26248 IX86_BUILTIN_GATHERDIV2DF
,
26249 IX86_BUILTIN_GATHERDIV4DF
,
26250 IX86_BUILTIN_GATHERSIV4SF
,
26251 IX86_BUILTIN_GATHERSIV8SF
,
26252 IX86_BUILTIN_GATHERDIV4SF
,
26253 IX86_BUILTIN_GATHERDIV8SF
,
26254 IX86_BUILTIN_GATHERSIV2DI
,
26255 IX86_BUILTIN_GATHERSIV4DI
,
26256 IX86_BUILTIN_GATHERDIV2DI
,
26257 IX86_BUILTIN_GATHERDIV4DI
,
26258 IX86_BUILTIN_GATHERSIV4SI
,
26259 IX86_BUILTIN_GATHERSIV8SI
,
26260 IX86_BUILTIN_GATHERDIV4SI
,
26261 IX86_BUILTIN_GATHERDIV8SI
,
26263 /* Alternate 4 element gather for the vectorizer where
26264 all operands are 32-byte wide. */
26265 IX86_BUILTIN_GATHERALTSIV4DF
,
26266 IX86_BUILTIN_GATHERALTDIV8SF
,
26267 IX86_BUILTIN_GATHERALTSIV4DI
,
26268 IX86_BUILTIN_GATHERALTDIV8SI
,
26270 /* TFmode support builtins. */
26272 IX86_BUILTIN_HUGE_VALQ
,
26273 IX86_BUILTIN_FABSQ
,
26274 IX86_BUILTIN_COPYSIGNQ
,
26276 /* Vectorizer support builtins. */
26277 IX86_BUILTIN_CPYSGNPS
,
26278 IX86_BUILTIN_CPYSGNPD
,
26279 IX86_BUILTIN_CPYSGNPS256
,
26280 IX86_BUILTIN_CPYSGNPD256
,
26282 /* FMA4 instructions. */
26283 IX86_BUILTIN_VFMADDSS
,
26284 IX86_BUILTIN_VFMADDSD
,
26285 IX86_BUILTIN_VFMADDPS
,
26286 IX86_BUILTIN_VFMADDPD
,
26287 IX86_BUILTIN_VFMADDPS256
,
26288 IX86_BUILTIN_VFMADDPD256
,
26289 IX86_BUILTIN_VFMADDSUBPS
,
26290 IX86_BUILTIN_VFMADDSUBPD
,
26291 IX86_BUILTIN_VFMADDSUBPS256
,
26292 IX86_BUILTIN_VFMADDSUBPD256
,
26294 /* FMA3 instructions. */
26295 IX86_BUILTIN_VFMADDSS3
,
26296 IX86_BUILTIN_VFMADDSD3
,
26298 /* XOP instructions. */
26299 IX86_BUILTIN_VPCMOV
,
26300 IX86_BUILTIN_VPCMOV_V2DI
,
26301 IX86_BUILTIN_VPCMOV_V4SI
,
26302 IX86_BUILTIN_VPCMOV_V8HI
,
26303 IX86_BUILTIN_VPCMOV_V16QI
,
26304 IX86_BUILTIN_VPCMOV_V4SF
,
26305 IX86_BUILTIN_VPCMOV_V2DF
,
26306 IX86_BUILTIN_VPCMOV256
,
26307 IX86_BUILTIN_VPCMOV_V4DI256
,
26308 IX86_BUILTIN_VPCMOV_V8SI256
,
26309 IX86_BUILTIN_VPCMOV_V16HI256
,
26310 IX86_BUILTIN_VPCMOV_V32QI256
,
26311 IX86_BUILTIN_VPCMOV_V8SF256
,
26312 IX86_BUILTIN_VPCMOV_V4DF256
,
26314 IX86_BUILTIN_VPPERM
,
26316 IX86_BUILTIN_VPMACSSWW
,
26317 IX86_BUILTIN_VPMACSWW
,
26318 IX86_BUILTIN_VPMACSSWD
,
26319 IX86_BUILTIN_VPMACSWD
,
26320 IX86_BUILTIN_VPMACSSDD
,
26321 IX86_BUILTIN_VPMACSDD
,
26322 IX86_BUILTIN_VPMACSSDQL
,
26323 IX86_BUILTIN_VPMACSSDQH
,
26324 IX86_BUILTIN_VPMACSDQL
,
26325 IX86_BUILTIN_VPMACSDQH
,
26326 IX86_BUILTIN_VPMADCSSWD
,
26327 IX86_BUILTIN_VPMADCSWD
,
26329 IX86_BUILTIN_VPHADDBW
,
26330 IX86_BUILTIN_VPHADDBD
,
26331 IX86_BUILTIN_VPHADDBQ
,
26332 IX86_BUILTIN_VPHADDWD
,
26333 IX86_BUILTIN_VPHADDWQ
,
26334 IX86_BUILTIN_VPHADDDQ
,
26335 IX86_BUILTIN_VPHADDUBW
,
26336 IX86_BUILTIN_VPHADDUBD
,
26337 IX86_BUILTIN_VPHADDUBQ
,
26338 IX86_BUILTIN_VPHADDUWD
,
26339 IX86_BUILTIN_VPHADDUWQ
,
26340 IX86_BUILTIN_VPHADDUDQ
,
26341 IX86_BUILTIN_VPHSUBBW
,
26342 IX86_BUILTIN_VPHSUBWD
,
26343 IX86_BUILTIN_VPHSUBDQ
,
26345 IX86_BUILTIN_VPROTB
,
26346 IX86_BUILTIN_VPROTW
,
26347 IX86_BUILTIN_VPROTD
,
26348 IX86_BUILTIN_VPROTQ
,
26349 IX86_BUILTIN_VPROTB_IMM
,
26350 IX86_BUILTIN_VPROTW_IMM
,
26351 IX86_BUILTIN_VPROTD_IMM
,
26352 IX86_BUILTIN_VPROTQ_IMM
,
26354 IX86_BUILTIN_VPSHLB
,
26355 IX86_BUILTIN_VPSHLW
,
26356 IX86_BUILTIN_VPSHLD
,
26357 IX86_BUILTIN_VPSHLQ
,
26358 IX86_BUILTIN_VPSHAB
,
26359 IX86_BUILTIN_VPSHAW
,
26360 IX86_BUILTIN_VPSHAD
,
26361 IX86_BUILTIN_VPSHAQ
,
26363 IX86_BUILTIN_VFRCZSS
,
26364 IX86_BUILTIN_VFRCZSD
,
26365 IX86_BUILTIN_VFRCZPS
,
26366 IX86_BUILTIN_VFRCZPD
,
26367 IX86_BUILTIN_VFRCZPS256
,
26368 IX86_BUILTIN_VFRCZPD256
,
26370 IX86_BUILTIN_VPCOMEQUB
,
26371 IX86_BUILTIN_VPCOMNEUB
,
26372 IX86_BUILTIN_VPCOMLTUB
,
26373 IX86_BUILTIN_VPCOMLEUB
,
26374 IX86_BUILTIN_VPCOMGTUB
,
26375 IX86_BUILTIN_VPCOMGEUB
,
26376 IX86_BUILTIN_VPCOMFALSEUB
,
26377 IX86_BUILTIN_VPCOMTRUEUB
,
26379 IX86_BUILTIN_VPCOMEQUW
,
26380 IX86_BUILTIN_VPCOMNEUW
,
26381 IX86_BUILTIN_VPCOMLTUW
,
26382 IX86_BUILTIN_VPCOMLEUW
,
26383 IX86_BUILTIN_VPCOMGTUW
,
26384 IX86_BUILTIN_VPCOMGEUW
,
26385 IX86_BUILTIN_VPCOMFALSEUW
,
26386 IX86_BUILTIN_VPCOMTRUEUW
,
26388 IX86_BUILTIN_VPCOMEQUD
,
26389 IX86_BUILTIN_VPCOMNEUD
,
26390 IX86_BUILTIN_VPCOMLTUD
,
26391 IX86_BUILTIN_VPCOMLEUD
,
26392 IX86_BUILTIN_VPCOMGTUD
,
26393 IX86_BUILTIN_VPCOMGEUD
,
26394 IX86_BUILTIN_VPCOMFALSEUD
,
26395 IX86_BUILTIN_VPCOMTRUEUD
,
26397 IX86_BUILTIN_VPCOMEQUQ
,
26398 IX86_BUILTIN_VPCOMNEUQ
,
26399 IX86_BUILTIN_VPCOMLTUQ
,
26400 IX86_BUILTIN_VPCOMLEUQ
,
26401 IX86_BUILTIN_VPCOMGTUQ
,
26402 IX86_BUILTIN_VPCOMGEUQ
,
26403 IX86_BUILTIN_VPCOMFALSEUQ
,
26404 IX86_BUILTIN_VPCOMTRUEUQ
,
26406 IX86_BUILTIN_VPCOMEQB
,
26407 IX86_BUILTIN_VPCOMNEB
,
26408 IX86_BUILTIN_VPCOMLTB
,
26409 IX86_BUILTIN_VPCOMLEB
,
26410 IX86_BUILTIN_VPCOMGTB
,
26411 IX86_BUILTIN_VPCOMGEB
,
26412 IX86_BUILTIN_VPCOMFALSEB
,
26413 IX86_BUILTIN_VPCOMTRUEB
,
26415 IX86_BUILTIN_VPCOMEQW
,
26416 IX86_BUILTIN_VPCOMNEW
,
26417 IX86_BUILTIN_VPCOMLTW
,
26418 IX86_BUILTIN_VPCOMLEW
,
26419 IX86_BUILTIN_VPCOMGTW
,
26420 IX86_BUILTIN_VPCOMGEW
,
26421 IX86_BUILTIN_VPCOMFALSEW
,
26422 IX86_BUILTIN_VPCOMTRUEW
,
26424 IX86_BUILTIN_VPCOMEQD
,
26425 IX86_BUILTIN_VPCOMNED
,
26426 IX86_BUILTIN_VPCOMLTD
,
26427 IX86_BUILTIN_VPCOMLED
,
26428 IX86_BUILTIN_VPCOMGTD
,
26429 IX86_BUILTIN_VPCOMGED
,
26430 IX86_BUILTIN_VPCOMFALSED
,
26431 IX86_BUILTIN_VPCOMTRUED
,
26433 IX86_BUILTIN_VPCOMEQQ
,
26434 IX86_BUILTIN_VPCOMNEQ
,
26435 IX86_BUILTIN_VPCOMLTQ
,
26436 IX86_BUILTIN_VPCOMLEQ
,
26437 IX86_BUILTIN_VPCOMGTQ
,
26438 IX86_BUILTIN_VPCOMGEQ
,
26439 IX86_BUILTIN_VPCOMFALSEQ
,
26440 IX86_BUILTIN_VPCOMTRUEQ
,
26442 /* LWP instructions. */
26443 IX86_BUILTIN_LLWPCB
,
26444 IX86_BUILTIN_SLWPCB
,
26445 IX86_BUILTIN_LWPVAL32
,
26446 IX86_BUILTIN_LWPVAL64
,
26447 IX86_BUILTIN_LWPINS32
,
26448 IX86_BUILTIN_LWPINS64
,
26453 IX86_BUILTIN_XBEGIN
,
26455 IX86_BUILTIN_XABORT
,
26456 IX86_BUILTIN_XTEST
,
26458 /* BMI instructions. */
26459 IX86_BUILTIN_BEXTR32
,
26460 IX86_BUILTIN_BEXTR64
,
26463 /* TBM instructions. */
26464 IX86_BUILTIN_BEXTRI32
,
26465 IX86_BUILTIN_BEXTRI64
,
26467 /* BMI2 instructions. */
26468 IX86_BUILTIN_BZHI32
,
26469 IX86_BUILTIN_BZHI64
,
26470 IX86_BUILTIN_PDEP32
,
26471 IX86_BUILTIN_PDEP64
,
26472 IX86_BUILTIN_PEXT32
,
26473 IX86_BUILTIN_PEXT64
,
26475 /* ADX instructions. */
26476 IX86_BUILTIN_ADDCARRYX32
,
26477 IX86_BUILTIN_ADDCARRYX64
,
26479 /* FSGSBASE instructions. */
26480 IX86_BUILTIN_RDFSBASE32
,
26481 IX86_BUILTIN_RDFSBASE64
,
26482 IX86_BUILTIN_RDGSBASE32
,
26483 IX86_BUILTIN_RDGSBASE64
,
26484 IX86_BUILTIN_WRFSBASE32
,
26485 IX86_BUILTIN_WRFSBASE64
,
26486 IX86_BUILTIN_WRGSBASE32
,
26487 IX86_BUILTIN_WRGSBASE64
,
26489 /* RDRND instructions. */
26490 IX86_BUILTIN_RDRAND16_STEP
,
26491 IX86_BUILTIN_RDRAND32_STEP
,
26492 IX86_BUILTIN_RDRAND64_STEP
,
26494 /* RDSEED instructions. */
26495 IX86_BUILTIN_RDSEED16_STEP
,
26496 IX86_BUILTIN_RDSEED32_STEP
,
26497 IX86_BUILTIN_RDSEED64_STEP
,
26499 /* F16C instructions. */
26500 IX86_BUILTIN_CVTPH2PS
,
26501 IX86_BUILTIN_CVTPH2PS256
,
26502 IX86_BUILTIN_CVTPS2PH
,
26503 IX86_BUILTIN_CVTPS2PH256
,
26505 /* CFString built-in for darwin */
26506 IX86_BUILTIN_CFSTRING
,
26508 /* Builtins to get CPU type and supported features. */
26509 IX86_BUILTIN_CPU_INIT
,
26510 IX86_BUILTIN_CPU_IS
,
26511 IX86_BUILTIN_CPU_SUPPORTS
,
26516 /* Table for the ix86 builtin decls. */
26517 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26519 /* Table of all of the builtin functions that are possible with different ISA's
26520 but are waiting to be built until a function is declared to use that
26522 struct builtin_isa
{
26523 const char *name
; /* function name */
26524 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26525 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26526 bool const_p
; /* true if the declaration is constant */
26527 bool set_and_not_built_p
;
26530 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26533 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26534 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26535 function decl in the ix86_builtins array. Returns the function decl or
26536 NULL_TREE, if the builtin was not added.
26538 If the front end has a special hook for builtin functions, delay adding
26539 builtin functions that aren't in the current ISA until the ISA is changed
26540 with function specific optimization. Doing so, can save about 300K for the
26541 default compiler. When the builtin is expanded, check at that time whether
26544 If the front end doesn't have a special hook, record all builtins, even if
26545 it isn't an instruction set in the current ISA in case the user uses
26546 function specific options for a different ISA, so that we don't get scope
26547 errors if a builtin is added in the middle of a function scope. */
26550 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26551 enum ix86_builtin_func_type tcode
,
26552 enum ix86_builtins code
)
26554 tree decl
= NULL_TREE
;
26556 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26558 ix86_builtins_isa
[(int) code
].isa
= mask
;
26560 mask
&= ~OPTION_MASK_ISA_64BIT
;
26562 || (mask
& ix86_isa_flags
) != 0
26563 || (lang_hooks
.builtin_function
26564 == lang_hooks
.builtin_function_ext_scope
))
26567 tree type
= ix86_get_builtin_func_type (tcode
);
26568 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26570 ix86_builtins
[(int) code
] = decl
;
26571 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26575 ix86_builtins
[(int) code
] = NULL_TREE
;
26576 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26577 ix86_builtins_isa
[(int) code
].name
= name
;
26578 ix86_builtins_isa
[(int) code
].const_p
= false;
26579 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26586 /* Like def_builtin, but also marks the function decl "const". */
26589 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26590 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26592 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26594 TREE_READONLY (decl
) = 1;
26596 ix86_builtins_isa
[(int) code
].const_p
= true;
26601 /* Add any new builtin functions for a given ISA that may not have been
26602 declared. This saves a bit of space compared to adding all of the
26603 declarations to the tree, even if we didn't use them. */
26606 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26610 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26612 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26613 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26617 /* Don't define the builtin again. */
26618 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26620 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26621 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26622 type
, i
, BUILT_IN_MD
, NULL
,
26625 ix86_builtins
[i
] = decl
;
26626 if (ix86_builtins_isa
[i
].const_p
)
26627 TREE_READONLY (decl
) = 1;
26632 /* Bits for builtin_description.flag. */
26634 /* Set when we don't support the comparison natively, and should
26635 swap_comparison in order to support it. */
26636 #define BUILTIN_DESC_SWAP_OPERANDS 1
26638 struct builtin_description
26640 const HOST_WIDE_INT mask
;
26641 const enum insn_code icode
;
26642 const char *const name
;
26643 const enum ix86_builtins code
;
26644 const enum rtx_code comparison
;
26648 static const struct builtin_description bdesc_comi
[] =
26650 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26651 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26652 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26653 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26654 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26655 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26656 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26657 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26658 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26659 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26660 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26661 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26662 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26663 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26664 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26665 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26666 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26667 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26668 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26669 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26670 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26671 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26672 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26673 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26676 static const struct builtin_description bdesc_pcmpestr
[] =
26679 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26680 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26681 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26682 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26683 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26684 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26685 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26688 static const struct builtin_description bdesc_pcmpistr
[] =
26691 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26692 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26693 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26694 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26695 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26696 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26697 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26700 /* Special builtins with variable number of arguments. */
26701 static const struct builtin_description bdesc_special_args
[] =
26703 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26704 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26705 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26708 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26711 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26713 /* FXSR, XSAVE and XSAVEOPT */
26714 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26715 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26716 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26717 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26718 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26720 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26721 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26722 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26723 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26724 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26727 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26728 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26729 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26731 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26732 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26733 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26734 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26736 /* SSE or 3DNow!A */
26737 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26738 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26748 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26756 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26759 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26762 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26763 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26766 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26767 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26769 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26770 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26771 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26772 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26773 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26775 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26776 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26777 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26778 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26779 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26780 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26781 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26783 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26784 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26785 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26787 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26788 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26789 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26790 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26791 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26792 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26793 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26794 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26797 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26798 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26799 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26800 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26801 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26802 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26803 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26804 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26805 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26807 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26808 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26809 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26810 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26811 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26812 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26815 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26816 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26817 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26818 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26819 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26820 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26821 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26822 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26825 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26826 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26827 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26830 /* Builtins with variable number of arguments. */
26831 static const struct builtin_description bdesc_args
[] =
26833 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26834 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26835 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26836 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26837 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26838 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26839 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26842 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26843 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26844 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26845 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26846 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26847 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26849 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26850 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26851 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26852 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26853 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26854 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26855 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26856 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26858 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26859 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26861 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26862 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26863 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26864 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26866 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26867 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26868 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26869 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26870 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26871 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26873 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26874 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26875 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26876 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26877 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26878 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26880 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26881 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26882 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26884 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26886 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26887 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26888 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26889 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26890 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26891 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26893 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26894 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26895 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26896 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26897 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26898 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26900 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26901 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26902 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26903 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26906 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26907 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26908 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26909 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26911 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26912 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26913 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26914 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26915 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26916 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26917 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26918 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26919 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26920 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26921 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26922 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26923 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26924 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26925 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26928 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26929 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26930 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26931 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26932 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26933 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26936 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26937 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26938 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26939 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26940 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26941 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26942 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26943 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26944 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26945 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26946 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26947 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26949 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26951 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26952 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26953 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26954 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26955 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26956 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26957 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26958 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26960 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26961 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26962 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26963 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26964 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26965 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26966 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26967 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26968 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26969 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26970 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26971 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26972 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26973 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26974 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26975 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26976 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26977 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26978 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26979 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26980 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26981 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26983 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26984 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26985 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26986 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26988 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26989 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26990 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26991 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26993 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26995 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26996 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26997 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26998 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26999 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27001 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27002 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27003 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27005 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27007 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27008 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27009 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27011 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27012 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27014 /* SSE MMX or 3Dnow!A */
27015 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27016 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27017 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27019 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27020 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27021 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27022 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27024 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27025 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27027 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27030 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27032 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27033 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27034 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27035 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27036 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27038 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27039 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27040 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27041 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27042 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27044 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27046 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27047 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27048 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27049 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27051 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27052 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27053 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27055 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27056 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27057 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27058 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27059 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27060 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27061 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27062 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27064 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27065 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27066 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27067 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27068 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27069 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27070 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27071 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27072 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27073 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27074 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27075 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27076 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27077 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27078 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27079 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27080 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27081 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27082 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27083 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27085 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27086 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27087 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27088 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27090 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27091 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27092 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27093 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27095 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27097 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27098 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27099 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27101 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27103 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27104 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27105 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27106 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27107 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27108 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27109 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27110 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27112 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27113 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27114 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27115 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27116 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27117 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27118 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27119 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27121 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27122 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27124 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27125 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27126 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27127 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27129 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27130 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27132 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27133 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27134 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27135 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27136 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27137 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27139 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27140 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27141 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27142 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27144 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27145 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27146 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27147 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27148 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27149 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27150 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27151 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27153 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27154 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27155 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27157 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27158 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27160 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27161 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27166 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27170 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27171 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27172 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27173 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27174 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27175 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27176 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27179 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27180 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27181 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27182 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27183 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27186 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27187 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27189 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27192 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27193 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27197 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27201 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27204 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27205 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27207 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27208 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27209 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27210 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27211 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27212 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27215 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27216 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27217 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27218 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27219 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27220 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27222 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27223 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27224 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27225 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27226 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27227 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27228 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27229 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27230 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27231 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27232 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27233 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27234 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27235 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27236 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27237 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27238 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27239 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27240 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27241 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27242 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27243 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27244 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27245 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27248 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27249 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27252 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27253 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27254 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27255 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27256 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27257 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27258 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27259 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27260 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27261 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27263 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27264 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27265 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27266 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27267 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27268 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27269 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27270 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27271 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27272 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27273 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27274 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27275 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27277 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27278 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27279 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27280 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27281 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27282 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27283 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27284 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27285 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27286 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27287 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27288 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27291 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27292 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27293 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27294 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27296 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27297 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27298 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27299 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27301 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27302 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27304 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27305 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27307 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27308 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27309 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27310 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27312 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27313 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27315 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27316 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27318 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27319 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27320 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27323 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27324 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27325 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27326 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27327 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27330 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27331 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27332 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27333 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27348 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27349 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27350 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27351 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27352 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27353 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27354 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27355 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27356 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27357 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27358 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27359 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27360 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27361 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27362 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27363 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27364 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27365 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27366 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27367 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27368 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27369 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27370 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27371 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27372 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27373 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27375 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27376 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27377 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27378 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27380 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27381 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27382 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27383 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27384 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27385 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27386 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27387 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27388 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27389 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27390 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27391 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27392 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27393 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27394 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27395 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27396 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27397 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27398 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27399 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27400 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27401 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27402 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27403 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27404 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27405 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27406 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27407 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27408 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27409 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27410 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27411 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27412 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27413 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27415 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27416 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27417 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27419 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27420 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27421 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27422 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27423 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27425 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27427 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27428 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27430 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27431 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27432 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27433 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27435 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27436 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27438 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27439 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27441 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27442 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27443 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27444 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27446 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27447 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27449 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27450 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27452 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27453 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27454 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27455 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27457 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27458 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27459 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27460 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27461 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27462 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27464 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27465 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27466 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27467 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27468 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27469 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27470 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27471 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27472 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27473 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27474 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27475 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27476 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27477 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27478 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27480 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27481 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27483 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27484 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27486 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27489 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27490 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27491 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27492 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27493 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27494 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27495 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27496 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27497 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27498 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27499 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27500 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27501 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27502 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27503 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27504 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27505 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27506 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27507 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27508 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27509 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27510 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27511 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27512 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27513 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27514 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27515 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27516 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27517 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27518 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27519 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27520 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27521 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27522 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27523 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27524 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27525 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27526 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27527 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27528 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27529 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27530 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27531 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27532 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27533 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27534 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27535 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27536 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27537 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27538 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27539 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27540 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27541 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27542 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27543 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27544 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27545 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27546 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27547 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27548 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27549 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27550 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27551 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27552 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27553 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27554 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27555 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27556 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27557 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27558 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27559 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27560 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27561 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27562 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27563 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27564 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27565 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27566 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27567 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27568 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27569 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27570 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27571 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27572 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27573 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27574 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27575 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27576 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27577 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27578 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27579 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27580 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27581 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27582 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27583 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27584 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27585 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27586 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27587 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27588 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27589 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27590 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27591 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27592 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27593 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27594 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27595 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27596 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27597 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27598 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27599 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27600 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27601 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27602 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27603 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27604 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27605 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27606 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27607 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27608 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27609 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27610 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27611 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27612 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27613 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27614 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27615 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27616 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27617 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27618 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27619 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27620 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27621 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27622 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27623 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27624 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27625 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27626 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27627 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27628 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27629 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27630 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27631 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27632 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27633 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27634 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27636 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27639 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27640 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27641 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27644 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27645 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27648 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27649 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27650 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27651 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27654 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27655 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27656 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27657 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27658 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27659 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27662 /* FMA4 and XOP. */
27663 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27664 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27665 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27666 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27667 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27668 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27669 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27670 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27671 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27672 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27673 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27674 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27675 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27676 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27677 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27678 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27679 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27680 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27681 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27682 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27683 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27684 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27685 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27686 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27687 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27688 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27689 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27690 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27691 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27692 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27693 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27694 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27695 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27696 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27697 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27698 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27699 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27700 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27701 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27702 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27703 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27704 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27705 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27706 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27707 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27708 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27709 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27710 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27711 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27712 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27713 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27714 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27716 static const struct builtin_description bdesc_multi_arg
[] =
27718 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27719 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27720 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27721 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27722 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27723 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27725 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27726 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27727 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27728 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27729 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27730 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27732 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27733 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27734 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27735 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27736 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27737 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27738 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27739 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27740 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27741 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27742 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27743 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27745 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27746 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27747 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27748 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27749 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27750 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27751 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27752 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27753 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27754 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27755 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27756 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27758 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27759 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27760 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27761 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27762 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27763 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27764 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27766 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27767 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27768 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27769 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27770 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27771 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27772 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27774 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27776 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27777 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27778 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27779 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27780 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27781 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27782 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27783 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27784 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27785 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27786 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27787 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27789 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27790 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27791 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27792 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27793 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27794 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27795 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27796 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27797 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27798 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27799 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27800 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27801 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27802 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27803 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27804 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27806 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27807 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27808 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27809 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27810 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27811 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27813 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27814 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27815 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27816 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27817 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27818 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27819 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27820 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27821 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27822 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27823 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27824 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27825 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27826 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27827 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27829 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27830 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27831 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27832 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27833 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27834 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27835 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27837 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27838 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27839 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27840 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27841 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27842 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27843 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27845 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27846 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27847 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27848 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27849 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27850 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27851 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27853 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27854 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27855 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27856 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27857 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27858 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27859 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27861 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27862 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27863 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27864 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27865 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27866 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27867 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27869 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27870 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27871 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27872 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27873 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27874 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27875 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27877 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27878 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27879 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27880 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27881 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27882 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27883 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27885 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27886 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27887 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27888 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27889 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27890 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27891 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27893 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27894 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27895 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27896 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27897 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27898 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27899 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27900 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27902 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27903 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27904 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27905 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27906 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27907 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27908 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27909 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27911 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27912 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27913 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27914 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27918 /* TM vector builtins. */
27920 /* Reuse the existing x86-specific `struct builtin_description' cause
27921 we're lazy. Add casts to make them fit. */
27922 static const struct builtin_description bdesc_tm
[] =
27924 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27925 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27926 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27927 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27928 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27929 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27930 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27932 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27933 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27934 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27935 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27936 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27937 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27938 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27940 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27941 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27942 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27943 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27944 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27945 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27946 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27948 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27949 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27950 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27953 /* TM callbacks. */
27955 /* Return the builtin decl needed to load a vector of TYPE. */
27958 ix86_builtin_tm_load (tree type
)
27960 if (TREE_CODE (type
) == VECTOR_TYPE
)
27962 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27965 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27967 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27969 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27975 /* Return the builtin decl needed to store a vector of TYPE. */
27978 ix86_builtin_tm_store (tree type
)
27980 if (TREE_CODE (type
) == VECTOR_TYPE
)
27982 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27985 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27987 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27989 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27995 /* Initialize the transactional memory vector load/store builtins. */
27998 ix86_init_tm_builtins (void)
28000 enum ix86_builtin_func_type ftype
;
28001 const struct builtin_description
*d
;
28004 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28005 tree attrs_log
, attrs_type_log
;
28010 /* If there are no builtins defined, we must be compiling in a
28011 language without trans-mem support. */
28012 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28015 /* Use whatever attributes a normal TM load has. */
28016 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28017 attrs_load
= DECL_ATTRIBUTES (decl
);
28018 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28019 /* Use whatever attributes a normal TM store has. */
28020 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28021 attrs_store
= DECL_ATTRIBUTES (decl
);
28022 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28023 /* Use whatever attributes a normal TM log has. */
28024 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28025 attrs_log
= DECL_ATTRIBUTES (decl
);
28026 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28028 for (i
= 0, d
= bdesc_tm
;
28029 i
< ARRAY_SIZE (bdesc_tm
);
28032 if ((d
->mask
& ix86_isa_flags
) != 0
28033 || (lang_hooks
.builtin_function
28034 == lang_hooks
.builtin_function_ext_scope
))
28036 tree type
, attrs
, attrs_type
;
28037 enum built_in_function code
= (enum built_in_function
) d
->code
;
28039 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28040 type
= ix86_get_builtin_func_type (ftype
);
28042 if (BUILTIN_TM_LOAD_P (code
))
28044 attrs
= attrs_load
;
28045 attrs_type
= attrs_type_load
;
28047 else if (BUILTIN_TM_STORE_P (code
))
28049 attrs
= attrs_store
;
28050 attrs_type
= attrs_type_store
;
28055 attrs_type
= attrs_type_log
;
28057 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28058 /* The builtin without the prefix for
28059 calling it directly. */
28060 d
->name
+ strlen ("__builtin_"),
28062 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28063 set the TYPE_ATTRIBUTES. */
28064 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28066 set_builtin_decl (code
, decl
, false);
28071 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28072 in the current target ISA to allow the user to compile particular modules
28073 with different target specific options that differ from the command line
28076 ix86_init_mmx_sse_builtins (void)
28078 const struct builtin_description
* d
;
28079 enum ix86_builtin_func_type ftype
;
28082 /* Add all special builtins with variable number of operands. */
28083 for (i
= 0, d
= bdesc_special_args
;
28084 i
< ARRAY_SIZE (bdesc_special_args
);
28090 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28091 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28094 /* Add all builtins with variable number of operands. */
28095 for (i
= 0, d
= bdesc_args
;
28096 i
< ARRAY_SIZE (bdesc_args
);
28102 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28103 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28106 /* pcmpestr[im] insns. */
28107 for (i
= 0, d
= bdesc_pcmpestr
;
28108 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28111 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28112 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28114 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28115 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28118 /* pcmpistr[im] insns. */
28119 for (i
= 0, d
= bdesc_pcmpistr
;
28120 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28123 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28124 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28126 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28127 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28130 /* comi/ucomi insns. */
28131 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28133 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28134 ftype
= INT_FTYPE_V2DF_V2DF
;
28136 ftype
= INT_FTYPE_V4SF_V4SF
;
28137 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28141 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28142 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28143 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28144 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28146 /* SSE or 3DNow!A */
28147 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28148 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28149 IX86_BUILTIN_MASKMOVQ
);
28152 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28153 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28155 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28156 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28157 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28158 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28161 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28162 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28163 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28164 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28167 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28168 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28169 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28170 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28171 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28172 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28173 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28174 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28175 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28176 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28177 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28178 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28181 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28182 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28185 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28186 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28187 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28188 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28189 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28190 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28191 IX86_BUILTIN_RDRAND64_STEP
);
28194 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28195 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28196 IX86_BUILTIN_GATHERSIV2DF
);
28198 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28199 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28200 IX86_BUILTIN_GATHERSIV4DF
);
28202 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28203 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28204 IX86_BUILTIN_GATHERDIV2DF
);
28206 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28207 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28208 IX86_BUILTIN_GATHERDIV4DF
);
28210 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28211 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28212 IX86_BUILTIN_GATHERSIV4SF
);
28214 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28215 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28216 IX86_BUILTIN_GATHERSIV8SF
);
28218 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28219 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28220 IX86_BUILTIN_GATHERDIV4SF
);
28222 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28223 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28224 IX86_BUILTIN_GATHERDIV8SF
);
28226 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28227 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28228 IX86_BUILTIN_GATHERSIV2DI
);
28230 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28231 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28232 IX86_BUILTIN_GATHERSIV4DI
);
28234 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28235 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28236 IX86_BUILTIN_GATHERDIV2DI
);
28238 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28239 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28240 IX86_BUILTIN_GATHERDIV4DI
);
28242 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28243 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28244 IX86_BUILTIN_GATHERSIV4SI
);
28246 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28247 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28248 IX86_BUILTIN_GATHERSIV8SI
);
28250 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28251 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28252 IX86_BUILTIN_GATHERDIV4SI
);
28254 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28255 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28256 IX86_BUILTIN_GATHERDIV8SI
);
28258 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28259 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28260 IX86_BUILTIN_GATHERALTSIV4DF
);
28262 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28263 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28264 IX86_BUILTIN_GATHERALTDIV8SF
);
28266 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28267 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28268 IX86_BUILTIN_GATHERALTSIV4DI
);
28270 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28271 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28272 IX86_BUILTIN_GATHERALTDIV8SI
);
28275 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28276 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28278 /* MMX access to the vec_init patterns. */
28279 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28280 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28282 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28283 V4HI_FTYPE_HI_HI_HI_HI
,
28284 IX86_BUILTIN_VEC_INIT_V4HI
);
28286 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28287 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28288 IX86_BUILTIN_VEC_INIT_V8QI
);
28290 /* Access to the vec_extract patterns. */
28291 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28292 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28293 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28294 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28295 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28296 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28297 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28298 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28299 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28300 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28302 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28303 "__builtin_ia32_vec_ext_v4hi",
28304 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28306 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28307 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28309 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28310 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28312 /* Access to the vec_set patterns. */
28313 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28314 "__builtin_ia32_vec_set_v2di",
28315 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28317 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28318 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28320 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28321 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28323 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28324 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28326 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28327 "__builtin_ia32_vec_set_v4hi",
28328 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28330 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28331 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28334 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28335 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28336 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28337 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28338 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28339 "__builtin_ia32_rdseed_di_step",
28340 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28343 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28344 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28345 def_builtin (OPTION_MASK_ISA_64BIT
,
28346 "__builtin_ia32_addcarryx_u64",
28347 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28348 IX86_BUILTIN_ADDCARRYX64
);
28350 /* Add FMA4 multi-arg argument instructions */
28351 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28356 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28357 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28361 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28362 to return a pointer to VERSION_DECL if the outcome of the expression
28363 formed by PREDICATE_CHAIN is true. This function will be called during
28364 version dispatch to decide which function version to execute. It returns
28365 the basic block at the end, to which more conditions can be added. */
28368 add_condition_to_bb (tree function_decl
, tree version_decl
,
28369 tree predicate_chain
, basic_block new_bb
)
28371 gimple return_stmt
;
28372 tree convert_expr
, result_var
;
28373 gimple convert_stmt
;
28374 gimple call_cond_stmt
;
28375 gimple if_else_stmt
;
28377 basic_block bb1
, bb2
, bb3
;
28380 tree cond_var
, and_expr_var
= NULL_TREE
;
28383 tree predicate_decl
, predicate_arg
;
28385 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
28387 gcc_assert (new_bb
!= NULL
);
28388 gseq
= bb_seq (new_bb
);
28391 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
28392 build_fold_addr_expr (version_decl
));
28393 result_var
= create_tmp_var (ptr_type_node
, NULL
);
28394 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
28395 return_stmt
= gimple_build_return (result_var
);
28397 if (predicate_chain
== NULL_TREE
)
28399 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28400 gimple_seq_add_stmt (&gseq
, return_stmt
);
28401 set_bb_seq (new_bb
, gseq
);
28402 gimple_set_bb (convert_stmt
, new_bb
);
28403 gimple_set_bb (return_stmt
, new_bb
);
28408 while (predicate_chain
!= NULL
)
28410 cond_var
= create_tmp_var (integer_type_node
, NULL
);
28411 predicate_decl
= TREE_PURPOSE (predicate_chain
);
28412 predicate_arg
= TREE_VALUE (predicate_chain
);
28413 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
28414 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
28416 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
28417 gimple_set_bb (call_cond_stmt
, new_bb
);
28418 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
28420 predicate_chain
= TREE_CHAIN (predicate_chain
);
28422 if (and_expr_var
== NULL
)
28423 and_expr_var
= cond_var
;
28426 gimple assign_stmt
;
28427 /* Use MIN_EXPR to check if any integer is zero?.
28428 and_expr_var = min_expr <cond_var, and_expr_var> */
28429 assign_stmt
= gimple_build_assign (and_expr_var
,
28430 build2 (MIN_EXPR
, integer_type_node
,
28431 cond_var
, and_expr_var
));
28433 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
28434 gimple_set_bb (assign_stmt
, new_bb
);
28435 gimple_seq_add_stmt (&gseq
, assign_stmt
);
28439 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
28441 NULL_TREE
, NULL_TREE
);
28442 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
28443 gimple_set_bb (if_else_stmt
, new_bb
);
28444 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
28446 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28447 gimple_seq_add_stmt (&gseq
, return_stmt
);
28448 set_bb_seq (new_bb
, gseq
);
28451 e12
= split_block (bb1
, if_else_stmt
);
28453 e12
->flags
&= ~EDGE_FALLTHRU
;
28454 e12
->flags
|= EDGE_TRUE_VALUE
;
28456 e23
= split_block (bb2
, return_stmt
);
28458 gimple_set_bb (convert_stmt
, bb2
);
28459 gimple_set_bb (return_stmt
, bb2
);
28462 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
28465 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
28472 /* This parses the attribute arguments to target in DECL and determines
28473 the right builtin to use to match the platform specification.
28474 It returns the priority value for this version decl. If PREDICATE_LIST
28475 is not NULL, it stores the list of cpu features that need to be checked
28476 before dispatching this function. */
28478 static unsigned int
28479 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
28482 struct cl_target_option cur_target
;
28484 struct cl_target_option
*new_target
;
28485 const char *arg_str
= NULL
;
28486 const char *attrs_str
= NULL
;
28487 char *tok_str
= NULL
;
28490 /* Priority of i386 features, greater value is higher priority. This is
28491 used to decide the order in which function dispatch must happen. For
28492 instance, a version specialized for SSE4.2 should be checked for dispatch
28493 before a version for SSE3, as SSE4.2 implies SSE3. */
28494 enum feature_priority
28515 enum feature_priority priority
= P_ZERO
;
28517 /* These are the target attribute strings for which a dispatcher is
28518 available, from fold_builtin_cpu. */
28520 static struct _feature_list
28522 const char *const name
;
28523 const enum feature_priority priority
;
28525 const feature_list
[] =
28531 {"ssse3", P_SSSE3
},
28532 {"sse4.1", P_SSE4_1
},
28533 {"sse4.2", P_SSE4_2
},
28534 {"popcnt", P_POPCNT
},
28540 static unsigned int NUM_FEATURES
28541 = sizeof (feature_list
) / sizeof (struct _feature_list
);
28545 tree predicate_chain
= NULL_TREE
;
28546 tree predicate_decl
, predicate_arg
;
28548 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
28549 gcc_assert (attrs
!= NULL
);
28551 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
28553 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
28554 attrs_str
= TREE_STRING_POINTER (attrs
);
28557 /* Handle arch= if specified. For priority, set it to be 1 more than
28558 the best instruction set the processor can handle. For instance, if
28559 there is a version for atom and a version for ssse3 (the highest ISA
28560 priority for atom), the atom version must be checked for dispatch
28561 before the ssse3 version. */
28562 if (strstr (attrs_str
, "arch=") != NULL
)
28564 cl_target_option_save (&cur_target
, &global_options
);
28565 target_node
= ix86_valid_target_attribute_tree (attrs
);
28567 gcc_assert (target_node
);
28568 new_target
= TREE_TARGET_OPTION (target_node
);
28569 gcc_assert (new_target
);
28571 if (new_target
->arch_specified
&& new_target
->arch
> 0)
28573 switch (new_target
->arch
)
28575 case PROCESSOR_CORE2_32
:
28576 case PROCESSOR_CORE2_64
:
28578 priority
= P_PROC_SSSE3
;
28580 case PROCESSOR_COREI7_32
:
28581 case PROCESSOR_COREI7_64
:
28582 arg_str
= "corei7";
28583 priority
= P_PROC_SSE4_2
;
28585 case PROCESSOR_ATOM
:
28587 priority
= P_PROC_SSSE3
;
28589 case PROCESSOR_AMDFAM10
:
28590 arg_str
= "amdfam10h";
28591 priority
= P_PROC_SSE4_a
;
28593 case PROCESSOR_BDVER1
:
28594 arg_str
= "bdver1";
28595 priority
= P_PROC_FMA
;
28597 case PROCESSOR_BDVER2
:
28598 arg_str
= "bdver2";
28599 priority
= P_PROC_FMA
;
28604 cl_target_option_restore (&global_options
, &cur_target
);
28606 if (predicate_list
&& arg_str
== NULL
)
28608 error_at (DECL_SOURCE_LOCATION (decl
),
28609 "No dispatcher found for the versioning attributes");
28613 if (predicate_list
)
28615 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
28616 /* For a C string literal the length includes the trailing NULL. */
28617 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
28618 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28623 /* Process feature name. */
28624 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
28625 strcpy (tok_str
, attrs_str
);
28626 token
= strtok (tok_str
, ",");
28627 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
28629 while (token
!= NULL
)
28631 /* Do not process "arch=" */
28632 if (strncmp (token
, "arch=", 5) == 0)
28634 token
= strtok (NULL
, ",");
28637 for (i
= 0; i
< NUM_FEATURES
; ++i
)
28639 if (strcmp (token
, feature_list
[i
].name
) == 0)
28641 if (predicate_list
)
28643 predicate_arg
= build_string_literal (
28644 strlen (feature_list
[i
].name
) + 1,
28645 feature_list
[i
].name
);
28646 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28649 /* Find the maximum priority feature. */
28650 if (feature_list
[i
].priority
> priority
)
28651 priority
= feature_list
[i
].priority
;
28656 if (predicate_list
&& i
== NUM_FEATURES
)
28658 error_at (DECL_SOURCE_LOCATION (decl
),
28659 "No dispatcher found for %s", token
);
28662 token
= strtok (NULL
, ",");
28666 if (predicate_list
&& predicate_chain
== NULL_TREE
)
28668 error_at (DECL_SOURCE_LOCATION (decl
),
28669 "No dispatcher found for the versioning attributes : %s",
28673 else if (predicate_list
)
28675 predicate_chain
= nreverse (predicate_chain
);
28676 *predicate_list
= predicate_chain
;
28682 /* This compares the priority of target features in function DECL1
28683 and DECL2. It returns positive value if DECL1 is higher priority,
28684 negative value if DECL2 is higher priority and 0 if they are the
28688 ix86_compare_version_priority (tree decl1
, tree decl2
)
28690 unsigned int priority1
= 0;
28691 unsigned int priority2
= 0;
28693 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl1
)) != NULL
)
28694 priority1
= get_builtin_code_for_version (decl1
, NULL
);
28696 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl2
)) != NULL
)
28697 priority2
= get_builtin_code_for_version (decl2
, NULL
);
28699 return (int)priority1
- (int)priority2
;
28702 /* V1 and V2 point to function versions with different priorities
28703 based on the target ISA. This function compares their priorities. */
28706 feature_compare (const void *v1
, const void *v2
)
28708 typedef struct _function_version_info
28711 tree predicate_chain
;
28712 unsigned int dispatch_priority
;
28713 } function_version_info
;
28715 const function_version_info c1
= *(const function_version_info
*)v1
;
28716 const function_version_info c2
= *(const function_version_info
*)v2
;
28717 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
28720 /* This function generates the dispatch function for
28721 multi-versioned functions. DISPATCH_DECL is the function which will
28722 contain the dispatch logic. FNDECLS are the function choices for
28723 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
28724 in DISPATCH_DECL in which the dispatch code is generated. */
28727 dispatch_function_versions (tree dispatch_decl
,
28729 basic_block
*empty_bb
)
28732 gimple ifunc_cpu_init_stmt
;
28736 vec
<tree
> *fndecls
;
28737 unsigned int num_versions
= 0;
28738 unsigned int actual_versions
= 0;
28741 struct _function_version_info
28744 tree predicate_chain
;
28745 unsigned int dispatch_priority
;
28746 }*function_version_info
;
28748 gcc_assert (dispatch_decl
!= NULL
28749 && fndecls_p
!= NULL
28750 && empty_bb
!= NULL
);
28752 /*fndecls_p is actually a vector. */
28753 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
28755 /* At least one more version other than the default. */
28756 num_versions
= fndecls
->length ();
28757 gcc_assert (num_versions
>= 2);
28759 function_version_info
= (struct _function_version_info
*)
28760 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
28762 /* The first version in the vector is the default decl. */
28763 default_decl
= (*fndecls
)[0];
28765 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
28767 gseq
= bb_seq (*empty_bb
);
28768 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
28769 constructors, so explicity call __builtin_cpu_init here. */
28770 ifunc_cpu_init_stmt
= gimple_build_call_vec (
28771 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vec
<tree
>());
28772 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
28773 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
28774 set_bb_seq (*empty_bb
, gseq
);
28779 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
28781 tree version_decl
= ele
;
28782 tree predicate_chain
= NULL_TREE
;
28783 unsigned int priority
;
28784 /* Get attribute string, parse it and find the right predicate decl.
28785 The predicate function could be a lengthy combination of many
28786 features, like arch-type and various isa-variants. */
28787 priority
= get_builtin_code_for_version (version_decl
,
28790 if (predicate_chain
== NULL_TREE
)
28794 function_version_info
[ix
- 1].version_decl
= version_decl
;
28795 function_version_info
[ix
- 1].predicate_chain
= predicate_chain
;
28796 function_version_info
[ix
- 1].dispatch_priority
= priority
;
28799 /* Sort the versions according to descending order of dispatch priority. The
28800 priority is based on the ISA. This is not a perfect solution. There
28801 could still be ambiguity. If more than one function version is suitable
28802 to execute, which one should be dispatched? In future, allow the user
28803 to specify a dispatch priority next to the version. */
28804 qsort (function_version_info
, actual_versions
,
28805 sizeof (struct _function_version_info
), feature_compare
);
28807 for (i
= 0; i
< actual_versions
; ++i
)
28808 *empty_bb
= add_condition_to_bb (dispatch_decl
,
28809 function_version_info
[i
].version_decl
,
28810 function_version_info
[i
].predicate_chain
,
28813 /* dispatch default version at the end. */
28814 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
28817 free (function_version_info
);
28821 /* This function returns true if FN1 and FN2 are versions of the same function,
28822 that is, the targets of the function decls are different. This assumes
28823 that FN1 and FN2 have the same signature. */
28826 ix86_function_versions (tree fn1
, tree fn2
)
28829 struct cl_target_option
*target1
, *target2
;
28831 if (TREE_CODE (fn1
) != FUNCTION_DECL
28832 || TREE_CODE (fn2
) != FUNCTION_DECL
)
28835 attr1
= DECL_FUNCTION_SPECIFIC_TARGET (fn1
);
28836 attr2
= DECL_FUNCTION_SPECIFIC_TARGET (fn2
);
28838 /* Atleast one function decl should have target attribute specified. */
28839 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
28842 if (attr1
== NULL_TREE
)
28843 attr1
= target_option_default_node
;
28844 else if (attr2
== NULL_TREE
)
28845 attr2
= target_option_default_node
;
28847 target1
= TREE_TARGET_OPTION (attr1
);
28848 target2
= TREE_TARGET_OPTION (attr2
);
28850 /* target1 and target2 must be different in some way. */
28851 if (target1
->x_ix86_isa_flags
== target2
->x_ix86_isa_flags
28852 && target1
->x_target_flags
== target2
->x_target_flags
28853 && target1
->arch
== target2
->arch
28854 && target1
->tune
== target2
->tune
28855 && target1
->x_ix86_fpmath
== target2
->x_ix86_fpmath
28856 && target1
->branch_cost
== target2
->branch_cost
)
28862 /* Comparator function to be used in qsort routine to sort attribute
28863 specification strings to "target". */
28866 attr_strcmp (const void *v1
, const void *v2
)
28868 const char *c1
= *(char *const*)v1
;
28869 const char *c2
= *(char *const*)v2
;
28870 return strcmp (c1
, c2
);
28873 /* STR is the argument to target attribute. This function tokenizes
28874 the comma separated arguments, sorts them and returns a string which
28875 is a unique identifier for the comma separated arguments. It also
28876 replaces non-identifier characters "=,-" with "_". */
28879 sorted_attr_string (const char *str
)
28881 char **args
= NULL
;
28882 char *attr_str
, *ret_str
;
28884 unsigned int argnum
= 1;
28887 for (i
= 0; i
< strlen (str
); i
++)
28891 attr_str
= (char *)xmalloc (strlen (str
) + 1);
28892 strcpy (attr_str
, str
);
28894 /* Replace "=,-" with "_". */
28895 for (i
= 0; i
< strlen (attr_str
); i
++)
28896 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
28902 args
= XNEWVEC (char *, argnum
);
28905 attr
= strtok (attr_str
, ",");
28906 while (attr
!= NULL
)
28910 attr
= strtok (NULL
, ",");
28913 qsort (args
, argnum
, sizeof (char*), attr_strcmp
);
28915 ret_str
= (char *)xmalloc (strlen (str
) + 1);
28916 strcpy (ret_str
, args
[0]);
28917 for (i
= 1; i
< argnum
; i
++)
28919 strcat (ret_str
, "_");
28920 strcat (ret_str
, args
[i
]);
28928 /* This function changes the assembler name for functions that are
28929 versions. If DECL is a function version and has a "target"
28930 attribute, it appends the attribute string to its assembler name. */
28933 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
28936 const char *orig_name
, *version_string
, *attr_str
;
28937 char *assembler_name
;
28939 if (DECL_DECLARED_INLINE_P (decl
)
28940 && lookup_attribute ("gnu_inline",
28941 DECL_ATTRIBUTES (decl
)))
28942 error_at (DECL_SOURCE_LOCATION (decl
),
28943 "Function versions cannot be marked as gnu_inline,"
28944 " bodies have to be generated");
28946 if (DECL_VIRTUAL_P (decl
)
28947 || DECL_VINDEX (decl
))
28948 error_at (DECL_SOURCE_LOCATION (decl
),
28949 "Virtual function versioning not supported\n");
28951 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
28953 /* target attribute string is NULL for default functions. */
28954 if (version_attr
== NULL_TREE
)
28957 orig_name
= IDENTIFIER_POINTER (id
);
28959 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
28961 attr_str
= sorted_attr_string (version_string
);
28962 assembler_name
= (char *) xmalloc (strlen (orig_name
)
28963 + strlen (attr_str
) + 2);
28965 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
28967 /* Allow assembler name to be modified if already set. */
28968 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
28969 SET_DECL_RTL (decl
, NULL
);
28971 return get_identifier (assembler_name
);
28975 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
28977 /* For function version, add the target suffix to the assembler name. */
28978 if (TREE_CODE (decl
) == FUNCTION_DECL
28979 && DECL_FUNCTION_VERSIONED (decl
))
28980 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
28981 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
28982 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
28988 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
28989 is true, append the full path name of the source file. */
28992 make_name (tree decl
, const char *suffix
, bool make_unique
)
28994 char *global_var_name
;
28997 const char *unique_name
= NULL
;
28999 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29001 /* Get a unique name that can be used globally without any chances
29002 of collision at link time. */
29004 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29006 name_len
= strlen (name
) + strlen (suffix
) + 2;
29009 name_len
+= strlen (unique_name
) + 1;
29010 global_var_name
= XNEWVEC (char, name_len
);
29012 /* Use '.' to concatenate names as it is demangler friendly. */
29014 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
,
29015 unique_name
, suffix
);
29017 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29019 return global_var_name
;
29022 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29024 /* Make a dispatcher declaration for the multi-versioned function DECL.
29025 Calls to DECL function will be replaced with calls to the dispatcher
29026 by the front-end. Return the decl created. */
29029 make_dispatcher_decl (const tree decl
)
29032 char *func_name
, *resolver_name
;
29033 tree fn_type
, func_type
;
29034 bool is_uniq
= false;
29036 if (TREE_PUBLIC (decl
) == 0)
29039 func_name
= make_name (decl
, "ifunc", is_uniq
);
29040 resolver_name
= make_name (decl
, "resolver", is_uniq
);
29041 gcc_assert (resolver_name
);
29043 fn_type
= TREE_TYPE (decl
);
29044 func_type
= build_function_type (TREE_TYPE (fn_type
),
29045 TYPE_ARG_TYPES (fn_type
));
29047 func_decl
= build_fn_decl (func_name
, func_type
);
29048 TREE_USED (func_decl
) = 1;
29049 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29050 DECL_INITIAL (func_decl
) = error_mark_node
;
29051 DECL_ARTIFICIAL (func_decl
) = 1;
29052 /* Mark this func as external, the resolver will flip it again if
29053 it gets generated. */
29054 DECL_EXTERNAL (func_decl
) = 1;
29055 /* This will be of type IFUNCs have to be externally visible. */
29056 TREE_PUBLIC (func_decl
) = 1;
29063 /* Returns true if decl is multi-versioned and DECL is the default function,
29064 that is it is not tagged with target specific optimization. */
29067 is_function_default_version (const tree decl
)
29069 return (TREE_CODE (decl
) == FUNCTION_DECL
29070 && DECL_FUNCTION_VERSIONED (decl
)
29071 && DECL_FUNCTION_SPECIFIC_TARGET (decl
) == NULL_TREE
);
29074 /* Make a dispatcher declaration for the multi-versioned function DECL.
29075 Calls to DECL function will be replaced with calls to the dispatcher
29076 by the front-end. Returns the decl of the dispatcher function. */
29079 ix86_get_function_versions_dispatcher (void *decl
)
29081 tree fn
= (tree
) decl
;
29082 struct cgraph_node
*node
= NULL
;
29083 struct cgraph_node
*default_node
= NULL
;
29084 struct cgraph_function_version_info
*node_v
= NULL
;
29085 struct cgraph_function_version_info
*first_v
= NULL
;
29087 tree dispatch_decl
= NULL
;
29089 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29090 struct cgraph_function_version_info
*it_v
= NULL
;
29091 struct cgraph_node
*dispatcher_node
= NULL
;
29092 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29095 struct cgraph_function_version_info
*default_version_info
= NULL
;
29097 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29099 node
= cgraph_get_node (fn
);
29100 gcc_assert (node
!= NULL
);
29102 node_v
= get_cgraph_node_version (node
);
29103 gcc_assert (node_v
!= NULL
);
29105 if (node_v
->dispatcher_resolver
!= NULL
)
29106 return node_v
->dispatcher_resolver
;
29108 /* Find the default version and make it the first node. */
29110 /* Go to the beginnig of the chain. */
29111 while (first_v
->prev
!= NULL
)
29112 first_v
= first_v
->prev
;
29113 default_version_info
= first_v
;
29114 while (default_version_info
!= NULL
)
29116 if (is_function_default_version
29117 (default_version_info
->this_node
->symbol
.decl
))
29119 default_version_info
= default_version_info
->next
;
29122 /* If there is no default node, just return NULL. */
29123 if (default_version_info
== NULL
)
29126 /* Make default info the first node. */
29127 if (first_v
!= default_version_info
)
29129 default_version_info
->prev
->next
= default_version_info
->next
;
29130 if (default_version_info
->next
)
29131 default_version_info
->next
->prev
= default_version_info
->prev
;
29132 first_v
->prev
= default_version_info
;
29133 default_version_info
->next
= first_v
;
29134 default_version_info
->prev
= NULL
;
29137 default_node
= default_version_info
->this_node
;
29139 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29140 /* Right now, the dispatching is done via ifunc. */
29141 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29143 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29144 gcc_assert (dispatcher_node
!= NULL
);
29145 dispatcher_node
->dispatcher_function
= 1;
29146 dispatcher_version_info
29147 = insert_new_cgraph_node_version (dispatcher_node
);
29148 dispatcher_version_info
->next
= default_version_info
;
29149 dispatcher_node
->local
.finalized
= 1;
29151 /* Set the dispatcher for all the versions. */
29152 it_v
= default_version_info
;
29153 while (it_v
->next
!= NULL
)
29155 it_v
->dispatcher_resolver
= dispatch_decl
;
29159 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29160 "multiversioning needs ifunc which is not supported "
29161 "in this configuration");
29163 return dispatch_decl
;
29166 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29170 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29173 tree attr_arg_name
;
29177 attr_name
= get_identifier (name
);
29178 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29179 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29180 attr
= tree_cons (attr_name
, attr_args
, chain
);
29184 /* Make the resolver function decl to dispatch the versions of
29185 a multi-versioned function, DEFAULT_DECL. Create an
29186 empty basic block in the resolver and store the pointer in
29187 EMPTY_BB. Return the decl of the resolver function. */
29190 make_resolver_func (const tree default_decl
,
29191 const tree dispatch_decl
,
29192 basic_block
*empty_bb
)
29194 char *resolver_name
;
29195 tree decl
, type
, decl_name
, t
;
29196 bool is_uniq
= false;
29198 /* IFUNC's have to be globally visible. So, if the default_decl is
29199 not, then the name of the IFUNC should be made unique. */
29200 if (TREE_PUBLIC (default_decl
) == 0)
29203 /* Append the filename to the resolver function if the versions are
29204 not externally visible. This is because the resolver function has
29205 to be externally visible for the loader to find it. So, appending
29206 the filename will prevent conflicts with a resolver function from
29207 another module which is based on the same version name. */
29208 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29210 /* The resolver function should return a (void *). */
29211 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29213 decl
= build_fn_decl (resolver_name
, type
);
29214 decl_name
= get_identifier (resolver_name
);
29215 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29217 DECL_NAME (decl
) = decl_name
;
29218 TREE_USED (decl
) = 1;
29219 DECL_ARTIFICIAL (decl
) = 1;
29220 DECL_IGNORED_P (decl
) = 0;
29221 /* IFUNC resolvers have to be externally visible. */
29222 TREE_PUBLIC (decl
) = 1;
29223 DECL_UNINLINABLE (decl
) = 0;
29225 /* Resolver is not external, body is generated. */
29226 DECL_EXTERNAL (decl
) = 0;
29227 DECL_EXTERNAL (dispatch_decl
) = 0;
29229 DECL_CONTEXT (decl
) = NULL_TREE
;
29230 DECL_INITIAL (decl
) = make_node (BLOCK
);
29231 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29233 if (DECL_COMDAT_GROUP (default_decl
)
29234 || TREE_PUBLIC (default_decl
))
29236 /* In this case, each translation unit with a call to this
29237 versioned function will put out a resolver. Ensure it
29238 is comdat to keep just one copy. */
29239 DECL_COMDAT (decl
) = 1;
29240 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29242 /* Build result decl and add to function_decl. */
29243 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29244 DECL_ARTIFICIAL (t
) = 1;
29245 DECL_IGNORED_P (t
) = 1;
29246 DECL_RESULT (decl
) = t
;
29248 gimplify_function_tree (decl
);
29249 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29250 *empty_bb
= init_lowered_empty_function (decl
, false);
29252 cgraph_add_new_function (decl
, true);
29253 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29257 gcc_assert (dispatch_decl
!= NULL
);
29258 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29259 DECL_ATTRIBUTES (dispatch_decl
)
29260 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29262 /* Create the alias for dispatch to resolver here. */
29263 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29264 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29268 /* Generate the dispatching code body to dispatch multi-versioned function
29269 DECL. The target hook is called to process the "target" attributes and
29270 provide the code to dispatch the right function at run-time. NODE points
29271 to the dispatcher decl whose body will be created. */
29274 ix86_generate_version_dispatcher_body (void *node_p
)
29276 tree resolver_decl
;
29277 basic_block empty_bb
;
29278 vec
<tree
> fn_ver_vec
= vec
<tree
>();
29279 tree default_ver_decl
;
29280 struct cgraph_node
*versn
;
29281 struct cgraph_node
*node
;
29283 struct cgraph_function_version_info
*node_version_info
= NULL
;
29284 struct cgraph_function_version_info
*versn_info
= NULL
;
29286 node
= (cgraph_node
*)node_p
;
29288 node_version_info
= get_cgraph_node_version (node
);
29289 gcc_assert (node
->dispatcher_function
29290 && node_version_info
!= NULL
);
29292 if (node_version_info
->dispatcher_resolver
)
29293 return node_version_info
->dispatcher_resolver
;
29295 /* The first version in the chain corresponds to the default version. */
29296 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
29298 /* node is going to be an alias, so remove the finalized bit. */
29299 node
->local
.finalized
= false;
29301 resolver_decl
= make_resolver_func (default_ver_decl
,
29302 node
->symbol
.decl
, &empty_bb
);
29304 node_version_info
->dispatcher_resolver
= resolver_decl
;
29306 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
29308 fn_ver_vec
.create (2);
29310 for (versn_info
= node_version_info
->next
; versn_info
;
29311 versn_info
= versn_info
->next
)
29313 versn
= versn_info
->this_node
;
29314 /* Check for virtual functions here again, as by this time it should
29315 have been determined if this function needs a vtable index or
29316 not. This happens for methods in derived classes that override
29317 virtual methods in base classes but are not explicitly marked as
29319 if (DECL_VINDEX (versn
->symbol
.decl
))
29320 error_at (DECL_SOURCE_LOCATION (versn
->symbol
.decl
),
29321 "Virtual function multiversioning not supported");
29322 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
29325 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
29327 rebuild_cgraph_edges ();
29329 return resolver_decl
;
29331 /* This builds the processor_model struct type defined in
29332 libgcc/config/i386/cpuinfo.c */
29335 build_processor_model_struct (void)
29337 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29339 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
29341 tree type
= make_node (RECORD_TYPE
);
29343 /* The first 3 fields are unsigned int. */
29344 for (i
= 0; i
< 3; ++i
)
29346 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29347 get_identifier (field_name
[i
]), unsigned_type_node
);
29348 if (field_chain
!= NULL_TREE
)
29349 DECL_CHAIN (field
) = field_chain
;
29350 field_chain
= field
;
29353 /* The last field is an array of unsigned integers of size one. */
29354 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29355 get_identifier (field_name
[3]),
29356 build_array_type (unsigned_type_node
,
29357 build_index_type (size_one_node
)));
29358 if (field_chain
!= NULL_TREE
)
29359 DECL_CHAIN (field
) = field_chain
;
29360 field_chain
= field
;
29362 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
29366 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29369 make_var_decl (tree type
, const char *name
)
29373 new_decl
= build_decl (UNKNOWN_LOCATION
,
29375 get_identifier(name
),
29378 DECL_EXTERNAL (new_decl
) = 1;
29379 TREE_STATIC (new_decl
) = 1;
29380 TREE_PUBLIC (new_decl
) = 1;
29381 DECL_INITIAL (new_decl
) = 0;
29382 DECL_ARTIFICIAL (new_decl
) = 0;
29383 DECL_PRESERVE_P (new_decl
) = 1;
29385 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
29386 assemble_variable (new_decl
, 0, 0, 0);
29391 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29392 into an integer defined in libgcc/config/i386/cpuinfo.c */
29395 fold_builtin_cpu (tree fndecl
, tree
*args
)
29398 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29399 DECL_FUNCTION_CODE (fndecl
);
29400 tree param_string_cst
= NULL
;
29402 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29403 enum processor_features
29419 /* These are the values for vendor types and cpu types and subtypes
29420 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29421 the corresponding start value. */
29422 enum processor_model
29432 M_CPU_SUBTYPE_START
,
29433 M_INTEL_COREI7_NEHALEM
,
29434 M_INTEL_COREI7_WESTMERE
,
29435 M_INTEL_COREI7_SANDYBRIDGE
,
29436 M_AMDFAM10H_BARCELONA
,
29437 M_AMDFAM10H_SHANGHAI
,
29438 M_AMDFAM10H_ISTANBUL
,
29439 M_AMDFAM15H_BDVER1
,
29440 M_AMDFAM15H_BDVER2
,
29444 static struct _arch_names_table
29446 const char *const name
;
29447 const enum processor_model model
;
29449 const arch_names_table
[] =
29452 {"intel", M_INTEL
},
29453 {"atom", M_INTEL_ATOM
},
29454 {"core2", M_INTEL_CORE2
},
29455 {"corei7", M_INTEL_COREI7
},
29456 {"nehalem", M_INTEL_COREI7_NEHALEM
},
29457 {"westmere", M_INTEL_COREI7_WESTMERE
},
29458 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
29459 {"amdfam10h", M_AMDFAM10H
},
29460 {"barcelona", M_AMDFAM10H_BARCELONA
},
29461 {"shanghai", M_AMDFAM10H_SHANGHAI
},
29462 {"istanbul", M_AMDFAM10H_ISTANBUL
},
29463 {"amdfam15h", M_AMDFAM15H
},
29464 {"bdver1", M_AMDFAM15H_BDVER1
},
29465 {"bdver2", M_AMDFAM15H_BDVER2
},
29466 {"bdver3", M_AMDFAM15H_BDVER3
},
29469 static struct _isa_names_table
29471 const char *const name
;
29472 const enum processor_features feature
;
29474 const isa_names_table
[] =
29478 {"popcnt", F_POPCNT
},
29482 {"ssse3", F_SSSE3
},
29483 {"sse4.1", F_SSE4_1
},
29484 {"sse4.2", F_SSE4_2
},
29489 static tree __processor_model_type
= NULL_TREE
;
29490 static tree __cpu_model_var
= NULL_TREE
;
29492 if (__processor_model_type
== NULL_TREE
)
29493 __processor_model_type
= build_processor_model_struct ();
29495 if (__cpu_model_var
== NULL_TREE
)
29496 __cpu_model_var
= make_var_decl (__processor_model_type
,
29499 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
29501 param_string_cst
= *args
;
29502 while (param_string_cst
29503 && TREE_CODE (param_string_cst
) != STRING_CST
)
29505 /* *args must be a expr that can contain other EXPRS leading to a
29507 if (!EXPR_P (param_string_cst
))
29509 error ("Parameter to builtin must be a string constant or literal");
29510 return integer_zero_node
;
29512 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
29515 gcc_assert (param_string_cst
);
29517 if (fn_code
== IX86_BUILTIN_CPU_IS
)
29523 unsigned int field_val
= 0;
29524 unsigned int NUM_ARCH_NAMES
29525 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
29527 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
29528 if (strcmp (arch_names_table
[i
].name
,
29529 TREE_STRING_POINTER (param_string_cst
)) == 0)
29532 if (i
== NUM_ARCH_NAMES
)
29534 error ("Parameter to builtin not valid: %s",
29535 TREE_STRING_POINTER (param_string_cst
));
29536 return integer_zero_node
;
29539 field
= TYPE_FIELDS (__processor_model_type
);
29540 field_val
= arch_names_table
[i
].model
;
29542 /* CPU types are stored in the next field. */
29543 if (field_val
> M_CPU_TYPE_START
29544 && field_val
< M_CPU_SUBTYPE_START
)
29546 field
= DECL_CHAIN (field
);
29547 field_val
-= M_CPU_TYPE_START
;
29550 /* CPU subtypes are stored in the next field. */
29551 if (field_val
> M_CPU_SUBTYPE_START
)
29553 field
= DECL_CHAIN ( DECL_CHAIN (field
));
29554 field_val
-= M_CPU_SUBTYPE_START
;
29557 /* Get the appropriate field in __cpu_model. */
29558 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29561 /* Check the value. */
29562 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
29563 build_int_cstu (unsigned_type_node
, field_val
));
29564 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29566 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29573 unsigned int field_val
= 0;
29574 unsigned int NUM_ISA_NAMES
29575 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
29577 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
29578 if (strcmp (isa_names_table
[i
].name
,
29579 TREE_STRING_POINTER (param_string_cst
)) == 0)
29582 if (i
== NUM_ISA_NAMES
)
29584 error ("Parameter to builtin not valid: %s",
29585 TREE_STRING_POINTER (param_string_cst
));
29586 return integer_zero_node
;
29589 field
= TYPE_FIELDS (__processor_model_type
);
29590 /* Get the last field, which is __cpu_features. */
29591 while (DECL_CHAIN (field
))
29592 field
= DECL_CHAIN (field
);
29594 /* Get the appropriate field: __cpu_model.__cpu_features */
29595 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29598 /* Access the 0th element of __cpu_features array. */
29599 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
29600 integer_zero_node
, NULL_TREE
, NULL_TREE
);
29602 field_val
= (1 << isa_names_table
[i
].feature
);
29603 /* Return __cpu_model.__cpu_features[0] & field_val */
29604 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
29605 build_int_cstu (unsigned_type_node
, field_val
));
29606 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29608 gcc_unreachable ();
29612 ix86_fold_builtin (tree fndecl
, int n_args
,
29613 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
29615 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29617 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29618 DECL_FUNCTION_CODE (fndecl
);
29619 if (fn_code
== IX86_BUILTIN_CPU_IS
29620 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29622 gcc_assert (n_args
== 1);
29623 return fold_builtin_cpu (fndecl
, args
);
29627 #ifdef SUBTARGET_FOLD_BUILTIN
29628 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
29634 /* Make builtins to detect cpu type and features supported. NAME is
29635 the builtin name, CODE is the builtin code, and FTYPE is the function
29636 type of the builtin. */
29639 make_cpu_type_builtin (const char* name
, int code
,
29640 enum ix86_builtin_func_type ftype
, bool is_const
)
29645 type
= ix86_get_builtin_func_type (ftype
);
29646 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29648 gcc_assert (decl
!= NULL_TREE
);
29649 ix86_builtins
[(int) code
] = decl
;
29650 TREE_READONLY (decl
) = is_const
;
29653 /* Make builtins to get CPU type and features supported. The created
29656 __builtin_cpu_init (), to detect cpu type and features,
29657 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
29658 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
29662 ix86_init_platform_type_builtins (void)
29664 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
29665 INT_FTYPE_VOID
, false);
29666 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
29667 INT_FTYPE_PCCHAR
, true);
29668 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
29669 INT_FTYPE_PCCHAR
, true);
29672 /* Internal method for ix86_init_builtins. */
29675 ix86_init_builtins_va_builtins_abi (void)
29677 tree ms_va_ref
, sysv_va_ref
;
29678 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
29679 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
29680 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
29681 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
29685 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
29686 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
29687 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
29689 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
29692 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29693 fnvoid_va_start_ms
=
29694 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29695 fnvoid_va_end_sysv
=
29696 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
29697 fnvoid_va_start_sysv
=
29698 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
29700 fnvoid_va_copy_ms
=
29701 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
29703 fnvoid_va_copy_sysv
=
29704 build_function_type_list (void_type_node
, sysv_va_ref
,
29705 sysv_va_ref
, NULL_TREE
);
29707 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
29708 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29709 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
29710 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29711 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
29712 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29713 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
29714 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29715 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
29716 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29717 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
29718 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29722 ix86_init_builtin_types (void)
29724 tree float128_type_node
, float80_type_node
;
29726 /* The __float80 type. */
29727 float80_type_node
= long_double_type_node
;
29728 if (TYPE_MODE (float80_type_node
) != XFmode
)
29730 /* The __float80 type. */
29731 float80_type_node
= make_node (REAL_TYPE
);
29733 TYPE_PRECISION (float80_type_node
) = 80;
29734 layout_type (float80_type_node
);
29736 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
29738 /* The __float128 type. */
29739 float128_type_node
= make_node (REAL_TYPE
);
29740 TYPE_PRECISION (float128_type_node
) = 128;
29741 layout_type (float128_type_node
);
29742 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
29744 /* This macro is built by i386-builtin-types.awk. */
29745 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
29749 ix86_init_builtins (void)
29753 ix86_init_builtin_types ();
29755 /* Builtins to get CPU type and features. */
29756 ix86_init_platform_type_builtins ();
29758 /* TFmode support builtins. */
29759 def_builtin_const (0, "__builtin_infq",
29760 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
29761 def_builtin_const (0, "__builtin_huge_valq",
29762 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
29764 /* We will expand them to normal call if SSE isn't available since
29765 they are used by libgcc. */
29766 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
29767 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
29768 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
29769 TREE_READONLY (t
) = 1;
29770 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
29772 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
29773 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
29774 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
29775 TREE_READONLY (t
) = 1;
29776 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
29778 ix86_init_tm_builtins ();
29779 ix86_init_mmx_sse_builtins ();
29782 ix86_init_builtins_va_builtins_abi ();
29784 #ifdef SUBTARGET_INIT_BUILTINS
29785 SUBTARGET_INIT_BUILTINS
;
29789 /* Return the ix86 builtin for CODE. */
29792 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
29794 if (code
>= IX86_BUILTIN_MAX
)
29795 return error_mark_node
;
29797 return ix86_builtins
[code
];
29800 /* Errors in the source file can cause expand_expr to return const0_rtx
29801 where we expect a vector. To avoid crashing, use one of the vector
29802 clear instructions. */
29804 safe_vector_operand (rtx x
, enum machine_mode mode
)
29806 if (x
== const0_rtx
)
29807 x
= CONST0_RTX (mode
);
29811 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
29814 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
29817 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29818 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29819 rtx op0
= expand_normal (arg0
);
29820 rtx op1
= expand_normal (arg1
);
29821 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
29822 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
29823 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
29825 if (VECTOR_MODE_P (mode0
))
29826 op0
= safe_vector_operand (op0
, mode0
);
29827 if (VECTOR_MODE_P (mode1
))
29828 op1
= safe_vector_operand (op1
, mode1
);
29830 if (optimize
|| !target
29831 || GET_MODE (target
) != tmode
29832 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
29833 target
= gen_reg_rtx (tmode
);
29835 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
29837 rtx x
= gen_reg_rtx (V4SImode
);
29838 emit_insn (gen_sse2_loadd (x
, op1
));
29839 op1
= gen_lowpart (TImode
, x
);
29842 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29843 op0
= copy_to_mode_reg (mode0
, op0
);
29844 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
29845 op1
= copy_to_mode_reg (mode1
, op1
);
29847 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
29856 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
29859 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
29860 enum ix86_builtin_func_type m_type
,
29861 enum rtx_code sub_code
)
29866 bool comparison_p
= false;
29868 bool last_arg_constant
= false;
29869 int num_memory
= 0;
29872 enum machine_mode mode
;
29875 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
29879 case MULTI_ARG_4_DF2_DI_I
:
29880 case MULTI_ARG_4_DF2_DI_I1
:
29881 case MULTI_ARG_4_SF2_SI_I
:
29882 case MULTI_ARG_4_SF2_SI_I1
:
29884 last_arg_constant
= true;
29887 case MULTI_ARG_3_SF
:
29888 case MULTI_ARG_3_DF
:
29889 case MULTI_ARG_3_SF2
:
29890 case MULTI_ARG_3_DF2
:
29891 case MULTI_ARG_3_DI
:
29892 case MULTI_ARG_3_SI
:
29893 case MULTI_ARG_3_SI_DI
:
29894 case MULTI_ARG_3_HI
:
29895 case MULTI_ARG_3_HI_SI
:
29896 case MULTI_ARG_3_QI
:
29897 case MULTI_ARG_3_DI2
:
29898 case MULTI_ARG_3_SI2
:
29899 case MULTI_ARG_3_HI2
:
29900 case MULTI_ARG_3_QI2
:
29904 case MULTI_ARG_2_SF
:
29905 case MULTI_ARG_2_DF
:
29906 case MULTI_ARG_2_DI
:
29907 case MULTI_ARG_2_SI
:
29908 case MULTI_ARG_2_HI
:
29909 case MULTI_ARG_2_QI
:
29913 case MULTI_ARG_2_DI_IMM
:
29914 case MULTI_ARG_2_SI_IMM
:
29915 case MULTI_ARG_2_HI_IMM
:
29916 case MULTI_ARG_2_QI_IMM
:
29918 last_arg_constant
= true;
29921 case MULTI_ARG_1_SF
:
29922 case MULTI_ARG_1_DF
:
29923 case MULTI_ARG_1_SF2
:
29924 case MULTI_ARG_1_DF2
:
29925 case MULTI_ARG_1_DI
:
29926 case MULTI_ARG_1_SI
:
29927 case MULTI_ARG_1_HI
:
29928 case MULTI_ARG_1_QI
:
29929 case MULTI_ARG_1_SI_DI
:
29930 case MULTI_ARG_1_HI_DI
:
29931 case MULTI_ARG_1_HI_SI
:
29932 case MULTI_ARG_1_QI_DI
:
29933 case MULTI_ARG_1_QI_SI
:
29934 case MULTI_ARG_1_QI_HI
:
29938 case MULTI_ARG_2_DI_CMP
:
29939 case MULTI_ARG_2_SI_CMP
:
29940 case MULTI_ARG_2_HI_CMP
:
29941 case MULTI_ARG_2_QI_CMP
:
29943 comparison_p
= true;
29946 case MULTI_ARG_2_SF_TF
:
29947 case MULTI_ARG_2_DF_TF
:
29948 case MULTI_ARG_2_DI_TF
:
29949 case MULTI_ARG_2_SI_TF
:
29950 case MULTI_ARG_2_HI_TF
:
29951 case MULTI_ARG_2_QI_TF
:
29957 gcc_unreachable ();
29960 if (optimize
|| !target
29961 || GET_MODE (target
) != tmode
29962 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
29963 target
= gen_reg_rtx (tmode
);
29965 gcc_assert (nargs
<= 4);
29967 for (i
= 0; i
< nargs
; i
++)
29969 tree arg
= CALL_EXPR_ARG (exp
, i
);
29970 rtx op
= expand_normal (arg
);
29971 int adjust
= (comparison_p
) ? 1 : 0;
29972 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
29974 if (last_arg_constant
&& i
== nargs
- 1)
29976 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
29978 enum insn_code new_icode
= icode
;
29981 case CODE_FOR_xop_vpermil2v2df3
:
29982 case CODE_FOR_xop_vpermil2v4sf3
:
29983 case CODE_FOR_xop_vpermil2v4df3
:
29984 case CODE_FOR_xop_vpermil2v8sf3
:
29985 error ("the last argument must be a 2-bit immediate");
29986 return gen_reg_rtx (tmode
);
29987 case CODE_FOR_xop_rotlv2di3
:
29988 new_icode
= CODE_FOR_rotlv2di3
;
29990 case CODE_FOR_xop_rotlv4si3
:
29991 new_icode
= CODE_FOR_rotlv4si3
;
29993 case CODE_FOR_xop_rotlv8hi3
:
29994 new_icode
= CODE_FOR_rotlv8hi3
;
29996 case CODE_FOR_xop_rotlv16qi3
:
29997 new_icode
= CODE_FOR_rotlv16qi3
;
29999 if (CONST_INT_P (op
))
30001 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30002 op
= GEN_INT (INTVAL (op
) & mask
);
30003 gcc_checking_assert
30004 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30008 gcc_checking_assert
30010 && insn_data
[new_icode
].operand
[0].mode
== tmode
30011 && insn_data
[new_icode
].operand
[1].mode
== tmode
30012 && insn_data
[new_icode
].operand
[2].mode
== mode
30013 && insn_data
[new_icode
].operand
[0].predicate
30014 == insn_data
[icode
].operand
[0].predicate
30015 && insn_data
[new_icode
].operand
[1].predicate
30016 == insn_data
[icode
].operand
[1].predicate
);
30022 gcc_unreachable ();
30029 if (VECTOR_MODE_P (mode
))
30030 op
= safe_vector_operand (op
, mode
);
30032 /* If we aren't optimizing, only allow one memory operand to be
30034 if (memory_operand (op
, mode
))
30037 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30040 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30042 op
= force_reg (mode
, op
);
30046 args
[i
].mode
= mode
;
30052 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30057 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30058 GEN_INT ((int)sub_code
));
30059 else if (! comparison_p
)
30060 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30063 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30067 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30072 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30076 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30080 gcc_unreachable ();
30090 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30091 insns with vec_merge. */
30094 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30098 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30099 rtx op1
, op0
= expand_normal (arg0
);
30100 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30101 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30103 if (optimize
|| !target
30104 || GET_MODE (target
) != tmode
30105 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30106 target
= gen_reg_rtx (tmode
);
30108 if (VECTOR_MODE_P (mode0
))
30109 op0
= safe_vector_operand (op0
, mode0
);
30111 if ((optimize
&& !register_operand (op0
, mode0
))
30112 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30113 op0
= copy_to_mode_reg (mode0
, op0
);
30116 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30117 op1
= copy_to_mode_reg (mode0
, op1
);
30119 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30126 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30129 ix86_expand_sse_compare (const struct builtin_description
*d
,
30130 tree exp
, rtx target
, bool swap
)
30133 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30134 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30135 rtx op0
= expand_normal (arg0
);
30136 rtx op1
= expand_normal (arg1
);
30138 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30139 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30140 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30141 enum rtx_code comparison
= d
->comparison
;
30143 if (VECTOR_MODE_P (mode0
))
30144 op0
= safe_vector_operand (op0
, mode0
);
30145 if (VECTOR_MODE_P (mode1
))
30146 op1
= safe_vector_operand (op1
, mode1
);
30148 /* Swap operands if we have a comparison that isn't available in
30152 rtx tmp
= gen_reg_rtx (mode1
);
30153 emit_move_insn (tmp
, op1
);
30158 if (optimize
|| !target
30159 || GET_MODE (target
) != tmode
30160 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30161 target
= gen_reg_rtx (tmode
);
30163 if ((optimize
&& !register_operand (op0
, mode0
))
30164 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30165 op0
= copy_to_mode_reg (mode0
, op0
);
30166 if ((optimize
&& !register_operand (op1
, mode1
))
30167 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30168 op1
= copy_to_mode_reg (mode1
, op1
);
30170 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30171 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30178 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30181 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30185 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30186 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30187 rtx op0
= expand_normal (arg0
);
30188 rtx op1
= expand_normal (arg1
);
30189 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30190 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30191 enum rtx_code comparison
= d
->comparison
;
30193 if (VECTOR_MODE_P (mode0
))
30194 op0
= safe_vector_operand (op0
, mode0
);
30195 if (VECTOR_MODE_P (mode1
))
30196 op1
= safe_vector_operand (op1
, mode1
);
30198 /* Swap operands if we have a comparison that isn't available in
30200 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30207 target
= gen_reg_rtx (SImode
);
30208 emit_move_insn (target
, const0_rtx
);
30209 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30211 if ((optimize
&& !register_operand (op0
, mode0
))
30212 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30213 op0
= copy_to_mode_reg (mode0
, op0
);
30214 if ((optimize
&& !register_operand (op1
, mode1
))
30215 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30216 op1
= copy_to_mode_reg (mode1
, op1
);
30218 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30222 emit_insn (gen_rtx_SET (VOIDmode
,
30223 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30224 gen_rtx_fmt_ee (comparison
, QImode
,
30228 return SUBREG_REG (target
);
30231 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30234 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30238 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30239 rtx op1
, op0
= expand_normal (arg0
);
30240 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30241 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30243 if (optimize
|| target
== 0
30244 || GET_MODE (target
) != tmode
30245 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30246 target
= gen_reg_rtx (tmode
);
30248 if (VECTOR_MODE_P (mode0
))
30249 op0
= safe_vector_operand (op0
, mode0
);
30251 if ((optimize
&& !register_operand (op0
, mode0
))
30252 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30253 op0
= copy_to_mode_reg (mode0
, op0
);
30255 op1
= GEN_INT (d
->comparison
);
30257 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30265 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30266 tree exp
, rtx target
)
30269 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30270 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30271 rtx op0
= expand_normal (arg0
);
30272 rtx op1
= expand_normal (arg1
);
30274 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30275 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30276 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30278 if (optimize
|| target
== 0
30279 || GET_MODE (target
) != tmode
30280 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30281 target
= gen_reg_rtx (tmode
);
30283 op0
= safe_vector_operand (op0
, mode0
);
30284 op1
= safe_vector_operand (op1
, mode1
);
30286 if ((optimize
&& !register_operand (op0
, mode0
))
30287 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30288 op0
= copy_to_mode_reg (mode0
, op0
);
30289 if ((optimize
&& !register_operand (op1
, mode1
))
30290 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30291 op1
= copy_to_mode_reg (mode1
, op1
);
30293 op2
= GEN_INT (d
->comparison
);
30295 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30302 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30305 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
30309 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30310 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30311 rtx op0
= expand_normal (arg0
);
30312 rtx op1
= expand_normal (arg1
);
30313 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30314 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30315 enum rtx_code comparison
= d
->comparison
;
30317 if (VECTOR_MODE_P (mode0
))
30318 op0
= safe_vector_operand (op0
, mode0
);
30319 if (VECTOR_MODE_P (mode1
))
30320 op1
= safe_vector_operand (op1
, mode1
);
30322 target
= gen_reg_rtx (SImode
);
30323 emit_move_insn (target
, const0_rtx
);
30324 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30326 if ((optimize
&& !register_operand (op0
, mode0
))
30327 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30328 op0
= copy_to_mode_reg (mode0
, op0
);
30329 if ((optimize
&& !register_operand (op1
, mode1
))
30330 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30331 op1
= copy_to_mode_reg (mode1
, op1
);
30333 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30337 emit_insn (gen_rtx_SET (VOIDmode
,
30338 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30339 gen_rtx_fmt_ee (comparison
, QImode
,
30343 return SUBREG_REG (target
);
30346 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30349 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
30350 tree exp
, rtx target
)
30353 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30354 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30355 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30356 tree arg3
= CALL_EXPR_ARG (exp
, 3);
30357 tree arg4
= CALL_EXPR_ARG (exp
, 4);
30358 rtx scratch0
, scratch1
;
30359 rtx op0
= expand_normal (arg0
);
30360 rtx op1
= expand_normal (arg1
);
30361 rtx op2
= expand_normal (arg2
);
30362 rtx op3
= expand_normal (arg3
);
30363 rtx op4
= expand_normal (arg4
);
30364 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
30366 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30367 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30368 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30369 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
30370 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
30371 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
30372 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
30374 if (VECTOR_MODE_P (modev2
))
30375 op0
= safe_vector_operand (op0
, modev2
);
30376 if (VECTOR_MODE_P (modev4
))
30377 op2
= safe_vector_operand (op2
, modev4
);
30379 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30380 op0
= copy_to_mode_reg (modev2
, op0
);
30381 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
30382 op1
= copy_to_mode_reg (modei3
, op1
);
30383 if ((optimize
&& !register_operand (op2
, modev4
))
30384 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
30385 op2
= copy_to_mode_reg (modev4
, op2
);
30386 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
30387 op3
= copy_to_mode_reg (modei5
, op3
);
30389 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
30391 error ("the fifth argument must be an 8-bit immediate");
30395 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
30397 if (optimize
|| !target
30398 || GET_MODE (target
) != tmode0
30399 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30400 target
= gen_reg_rtx (tmode0
);
30402 scratch1
= gen_reg_rtx (tmode1
);
30404 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30406 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30408 if (optimize
|| !target
30409 || GET_MODE (target
) != tmode1
30410 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30411 target
= gen_reg_rtx (tmode1
);
30413 scratch0
= gen_reg_rtx (tmode0
);
30415 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
30419 gcc_assert (d
->flag
);
30421 scratch0
= gen_reg_rtx (tmode0
);
30422 scratch1
= gen_reg_rtx (tmode1
);
30424 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30434 target
= gen_reg_rtx (SImode
);
30435 emit_move_insn (target
, const0_rtx
);
30436 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30439 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30440 gen_rtx_fmt_ee (EQ
, QImode
,
30441 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30444 return SUBREG_REG (target
);
30451 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30454 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
30455 tree exp
, rtx target
)
30458 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30459 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30460 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30461 rtx scratch0
, scratch1
;
30462 rtx op0
= expand_normal (arg0
);
30463 rtx op1
= expand_normal (arg1
);
30464 rtx op2
= expand_normal (arg2
);
30465 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
30467 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30468 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30469 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30470 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
30471 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
30473 if (VECTOR_MODE_P (modev2
))
30474 op0
= safe_vector_operand (op0
, modev2
);
30475 if (VECTOR_MODE_P (modev3
))
30476 op1
= safe_vector_operand (op1
, modev3
);
30478 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30479 op0
= copy_to_mode_reg (modev2
, op0
);
30480 if ((optimize
&& !register_operand (op1
, modev3
))
30481 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
30482 op1
= copy_to_mode_reg (modev3
, op1
);
30484 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
30486 error ("the third argument must be an 8-bit immediate");
30490 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
30492 if (optimize
|| !target
30493 || GET_MODE (target
) != tmode0
30494 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30495 target
= gen_reg_rtx (tmode0
);
30497 scratch1
= gen_reg_rtx (tmode1
);
30499 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
30501 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30503 if (optimize
|| !target
30504 || GET_MODE (target
) != tmode1
30505 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30506 target
= gen_reg_rtx (tmode1
);
30508 scratch0
= gen_reg_rtx (tmode0
);
30510 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
30514 gcc_assert (d
->flag
);
30516 scratch0
= gen_reg_rtx (tmode0
);
30517 scratch1
= gen_reg_rtx (tmode1
);
30519 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
30529 target
= gen_reg_rtx (SImode
);
30530 emit_move_insn (target
, const0_rtx
);
30531 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30534 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30535 gen_rtx_fmt_ee (EQ
, QImode
,
30536 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30539 return SUBREG_REG (target
);
30545 /* Subroutine of ix86_expand_builtin to take care of insns with
30546 variable number of operands. */
30549 ix86_expand_args_builtin (const struct builtin_description
*d
,
30550 tree exp
, rtx target
)
30552 rtx pat
, real_target
;
30553 unsigned int i
, nargs
;
30554 unsigned int nargs_constant
= 0;
30555 int num_memory
= 0;
30559 enum machine_mode mode
;
30561 bool last_arg_count
= false;
30562 enum insn_code icode
= d
->icode
;
30563 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
30564 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
30565 enum machine_mode rmode
= VOIDmode
;
30567 enum rtx_code comparison
= d
->comparison
;
30569 switch ((enum ix86_builtin_func_type
) d
->flag
)
30571 case V2DF_FTYPE_V2DF_ROUND
:
30572 case V4DF_FTYPE_V4DF_ROUND
:
30573 case V4SF_FTYPE_V4SF_ROUND
:
30574 case V8SF_FTYPE_V8SF_ROUND
:
30575 case V4SI_FTYPE_V4SF_ROUND
:
30576 case V8SI_FTYPE_V8SF_ROUND
:
30577 return ix86_expand_sse_round (d
, exp
, target
);
30578 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
30579 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
30580 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
30581 case INT_FTYPE_V8SF_V8SF_PTEST
:
30582 case INT_FTYPE_V4DI_V4DI_PTEST
:
30583 case INT_FTYPE_V4DF_V4DF_PTEST
:
30584 case INT_FTYPE_V4SF_V4SF_PTEST
:
30585 case INT_FTYPE_V2DI_V2DI_PTEST
:
30586 case INT_FTYPE_V2DF_V2DF_PTEST
:
30587 return ix86_expand_sse_ptest (d
, exp
, target
);
30588 case FLOAT128_FTYPE_FLOAT128
:
30589 case FLOAT_FTYPE_FLOAT
:
30590 case INT_FTYPE_INT
:
30591 case UINT64_FTYPE_INT
:
30592 case UINT16_FTYPE_UINT16
:
30593 case INT64_FTYPE_INT64
:
30594 case INT64_FTYPE_V4SF
:
30595 case INT64_FTYPE_V2DF
:
30596 case INT_FTYPE_V16QI
:
30597 case INT_FTYPE_V8QI
:
30598 case INT_FTYPE_V8SF
:
30599 case INT_FTYPE_V4DF
:
30600 case INT_FTYPE_V4SF
:
30601 case INT_FTYPE_V2DF
:
30602 case INT_FTYPE_V32QI
:
30603 case V16QI_FTYPE_V16QI
:
30604 case V8SI_FTYPE_V8SF
:
30605 case V8SI_FTYPE_V4SI
:
30606 case V8HI_FTYPE_V8HI
:
30607 case V8HI_FTYPE_V16QI
:
30608 case V8QI_FTYPE_V8QI
:
30609 case V8SF_FTYPE_V8SF
:
30610 case V8SF_FTYPE_V8SI
:
30611 case V8SF_FTYPE_V4SF
:
30612 case V8SF_FTYPE_V8HI
:
30613 case V4SI_FTYPE_V4SI
:
30614 case V4SI_FTYPE_V16QI
:
30615 case V4SI_FTYPE_V4SF
:
30616 case V4SI_FTYPE_V8SI
:
30617 case V4SI_FTYPE_V8HI
:
30618 case V4SI_FTYPE_V4DF
:
30619 case V4SI_FTYPE_V2DF
:
30620 case V4HI_FTYPE_V4HI
:
30621 case V4DF_FTYPE_V4DF
:
30622 case V4DF_FTYPE_V4SI
:
30623 case V4DF_FTYPE_V4SF
:
30624 case V4DF_FTYPE_V2DF
:
30625 case V4SF_FTYPE_V4SF
:
30626 case V4SF_FTYPE_V4SI
:
30627 case V4SF_FTYPE_V8SF
:
30628 case V4SF_FTYPE_V4DF
:
30629 case V4SF_FTYPE_V8HI
:
30630 case V4SF_FTYPE_V2DF
:
30631 case V2DI_FTYPE_V2DI
:
30632 case V2DI_FTYPE_V16QI
:
30633 case V2DI_FTYPE_V8HI
:
30634 case V2DI_FTYPE_V4SI
:
30635 case V2DF_FTYPE_V2DF
:
30636 case V2DF_FTYPE_V4SI
:
30637 case V2DF_FTYPE_V4DF
:
30638 case V2DF_FTYPE_V4SF
:
30639 case V2DF_FTYPE_V2SI
:
30640 case V2SI_FTYPE_V2SI
:
30641 case V2SI_FTYPE_V4SF
:
30642 case V2SI_FTYPE_V2SF
:
30643 case V2SI_FTYPE_V2DF
:
30644 case V2SF_FTYPE_V2SF
:
30645 case V2SF_FTYPE_V2SI
:
30646 case V32QI_FTYPE_V32QI
:
30647 case V32QI_FTYPE_V16QI
:
30648 case V16HI_FTYPE_V16HI
:
30649 case V16HI_FTYPE_V8HI
:
30650 case V8SI_FTYPE_V8SI
:
30651 case V16HI_FTYPE_V16QI
:
30652 case V8SI_FTYPE_V16QI
:
30653 case V4DI_FTYPE_V16QI
:
30654 case V8SI_FTYPE_V8HI
:
30655 case V4DI_FTYPE_V8HI
:
30656 case V4DI_FTYPE_V4SI
:
30657 case V4DI_FTYPE_V2DI
:
30660 case V4SF_FTYPE_V4SF_VEC_MERGE
:
30661 case V2DF_FTYPE_V2DF_VEC_MERGE
:
30662 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
30663 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
30664 case V16QI_FTYPE_V16QI_V16QI
:
30665 case V16QI_FTYPE_V8HI_V8HI
:
30666 case V8QI_FTYPE_V8QI_V8QI
:
30667 case V8QI_FTYPE_V4HI_V4HI
:
30668 case V8HI_FTYPE_V8HI_V8HI
:
30669 case V8HI_FTYPE_V16QI_V16QI
:
30670 case V8HI_FTYPE_V4SI_V4SI
:
30671 case V8SF_FTYPE_V8SF_V8SF
:
30672 case V8SF_FTYPE_V8SF_V8SI
:
30673 case V4SI_FTYPE_V4SI_V4SI
:
30674 case V4SI_FTYPE_V8HI_V8HI
:
30675 case V4SI_FTYPE_V4SF_V4SF
:
30676 case V4SI_FTYPE_V2DF_V2DF
:
30677 case V4HI_FTYPE_V4HI_V4HI
:
30678 case V4HI_FTYPE_V8QI_V8QI
:
30679 case V4HI_FTYPE_V2SI_V2SI
:
30680 case V4DF_FTYPE_V4DF_V4DF
:
30681 case V4DF_FTYPE_V4DF_V4DI
:
30682 case V4SF_FTYPE_V4SF_V4SF
:
30683 case V4SF_FTYPE_V4SF_V4SI
:
30684 case V4SF_FTYPE_V4SF_V2SI
:
30685 case V4SF_FTYPE_V4SF_V2DF
:
30686 case V4SF_FTYPE_V4SF_DI
:
30687 case V4SF_FTYPE_V4SF_SI
:
30688 case V2DI_FTYPE_V2DI_V2DI
:
30689 case V2DI_FTYPE_V16QI_V16QI
:
30690 case V2DI_FTYPE_V4SI_V4SI
:
30691 case V2UDI_FTYPE_V4USI_V4USI
:
30692 case V2DI_FTYPE_V2DI_V16QI
:
30693 case V2DI_FTYPE_V2DF_V2DF
:
30694 case V2SI_FTYPE_V2SI_V2SI
:
30695 case V2SI_FTYPE_V4HI_V4HI
:
30696 case V2SI_FTYPE_V2SF_V2SF
:
30697 case V2DF_FTYPE_V2DF_V2DF
:
30698 case V2DF_FTYPE_V2DF_V4SF
:
30699 case V2DF_FTYPE_V2DF_V2DI
:
30700 case V2DF_FTYPE_V2DF_DI
:
30701 case V2DF_FTYPE_V2DF_SI
:
30702 case V2SF_FTYPE_V2SF_V2SF
:
30703 case V1DI_FTYPE_V1DI_V1DI
:
30704 case V1DI_FTYPE_V8QI_V8QI
:
30705 case V1DI_FTYPE_V2SI_V2SI
:
30706 case V32QI_FTYPE_V16HI_V16HI
:
30707 case V16HI_FTYPE_V8SI_V8SI
:
30708 case V32QI_FTYPE_V32QI_V32QI
:
30709 case V16HI_FTYPE_V32QI_V32QI
:
30710 case V16HI_FTYPE_V16HI_V16HI
:
30711 case V8SI_FTYPE_V4DF_V4DF
:
30712 case V8SI_FTYPE_V8SI_V8SI
:
30713 case V8SI_FTYPE_V16HI_V16HI
:
30714 case V4DI_FTYPE_V4DI_V4DI
:
30715 case V4DI_FTYPE_V8SI_V8SI
:
30716 case V4UDI_FTYPE_V8USI_V8USI
:
30717 if (comparison
== UNKNOWN
)
30718 return ix86_expand_binop_builtin (icode
, exp
, target
);
30721 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
30722 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
30723 gcc_assert (comparison
!= UNKNOWN
);
30727 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
30728 case V16HI_FTYPE_V16HI_SI_COUNT
:
30729 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
30730 case V8SI_FTYPE_V8SI_SI_COUNT
:
30731 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
30732 case V4DI_FTYPE_V4DI_INT_COUNT
:
30733 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
30734 case V8HI_FTYPE_V8HI_SI_COUNT
:
30735 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
30736 case V4SI_FTYPE_V4SI_SI_COUNT
:
30737 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
30738 case V4HI_FTYPE_V4HI_SI_COUNT
:
30739 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
30740 case V2DI_FTYPE_V2DI_SI_COUNT
:
30741 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
30742 case V2SI_FTYPE_V2SI_SI_COUNT
:
30743 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
30744 case V1DI_FTYPE_V1DI_SI_COUNT
:
30746 last_arg_count
= true;
30748 case UINT64_FTYPE_UINT64_UINT64
:
30749 case UINT_FTYPE_UINT_UINT
:
30750 case UINT_FTYPE_UINT_USHORT
:
30751 case UINT_FTYPE_UINT_UCHAR
:
30752 case UINT16_FTYPE_UINT16_INT
:
30753 case UINT8_FTYPE_UINT8_INT
:
30756 case V2DI_FTYPE_V2DI_INT_CONVERT
:
30759 nargs_constant
= 1;
30761 case V4DI_FTYPE_V4DI_INT_CONVERT
:
30764 nargs_constant
= 1;
30766 case V8HI_FTYPE_V8HI_INT
:
30767 case V8HI_FTYPE_V8SF_INT
:
30768 case V8HI_FTYPE_V4SF_INT
:
30769 case V8SF_FTYPE_V8SF_INT
:
30770 case V4SI_FTYPE_V4SI_INT
:
30771 case V4SI_FTYPE_V8SI_INT
:
30772 case V4HI_FTYPE_V4HI_INT
:
30773 case V4DF_FTYPE_V4DF_INT
:
30774 case V4SF_FTYPE_V4SF_INT
:
30775 case V4SF_FTYPE_V8SF_INT
:
30776 case V2DI_FTYPE_V2DI_INT
:
30777 case V2DF_FTYPE_V2DF_INT
:
30778 case V2DF_FTYPE_V4DF_INT
:
30779 case V16HI_FTYPE_V16HI_INT
:
30780 case V8SI_FTYPE_V8SI_INT
:
30781 case V4DI_FTYPE_V4DI_INT
:
30782 case V2DI_FTYPE_V4DI_INT
:
30784 nargs_constant
= 1;
30786 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
30787 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
30788 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
30789 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
30790 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
30791 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
30794 case V32QI_FTYPE_V32QI_V32QI_INT
:
30795 case V16HI_FTYPE_V16HI_V16HI_INT
:
30796 case V16QI_FTYPE_V16QI_V16QI_INT
:
30797 case V4DI_FTYPE_V4DI_V4DI_INT
:
30798 case V8HI_FTYPE_V8HI_V8HI_INT
:
30799 case V8SI_FTYPE_V8SI_V8SI_INT
:
30800 case V8SI_FTYPE_V8SI_V4SI_INT
:
30801 case V8SF_FTYPE_V8SF_V8SF_INT
:
30802 case V8SF_FTYPE_V8SF_V4SF_INT
:
30803 case V4SI_FTYPE_V4SI_V4SI_INT
:
30804 case V4DF_FTYPE_V4DF_V4DF_INT
:
30805 case V4DF_FTYPE_V4DF_V2DF_INT
:
30806 case V4SF_FTYPE_V4SF_V4SF_INT
:
30807 case V2DI_FTYPE_V2DI_V2DI_INT
:
30808 case V4DI_FTYPE_V4DI_V2DI_INT
:
30809 case V2DF_FTYPE_V2DF_V2DF_INT
:
30811 nargs_constant
= 1;
30813 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
30816 nargs_constant
= 1;
30818 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
30821 nargs_constant
= 1;
30823 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
30826 nargs_constant
= 1;
30828 case V2DI_FTYPE_V2DI_UINT_UINT
:
30830 nargs_constant
= 2;
30832 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
30833 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
30834 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
30835 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
30837 nargs_constant
= 1;
30839 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
30841 nargs_constant
= 2;
30843 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
30844 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
30848 gcc_unreachable ();
30851 gcc_assert (nargs
<= ARRAY_SIZE (args
));
30853 if (comparison
!= UNKNOWN
)
30855 gcc_assert (nargs
== 2);
30856 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
30859 if (rmode
== VOIDmode
|| rmode
== tmode
)
30863 || GET_MODE (target
) != tmode
30864 || !insn_p
->operand
[0].predicate (target
, tmode
))
30865 target
= gen_reg_rtx (tmode
);
30866 real_target
= target
;
30870 target
= gen_reg_rtx (rmode
);
30871 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
30874 for (i
= 0; i
< nargs
; i
++)
30876 tree arg
= CALL_EXPR_ARG (exp
, i
);
30877 rtx op
= expand_normal (arg
);
30878 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
30879 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
30881 if (last_arg_count
&& (i
+ 1) == nargs
)
30883 /* SIMD shift insns take either an 8-bit immediate or
30884 register as count. But builtin functions take int as
30885 count. If count doesn't match, we put it in register. */
30888 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
30889 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
30890 op
= copy_to_reg (op
);
30893 else if ((nargs
- i
) <= nargs_constant
)
30898 case CODE_FOR_avx2_inserti128
:
30899 case CODE_FOR_avx2_extracti128
:
30900 error ("the last argument must be an 1-bit immediate");
30903 case CODE_FOR_sse4_1_roundsd
:
30904 case CODE_FOR_sse4_1_roundss
:
30906 case CODE_FOR_sse4_1_roundpd
:
30907 case CODE_FOR_sse4_1_roundps
:
30908 case CODE_FOR_avx_roundpd256
:
30909 case CODE_FOR_avx_roundps256
:
30911 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
30912 case CODE_FOR_sse4_1_roundps_sfix
:
30913 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
30914 case CODE_FOR_avx_roundps_sfix256
:
30916 case CODE_FOR_sse4_1_blendps
:
30917 case CODE_FOR_avx_blendpd256
:
30918 case CODE_FOR_avx_vpermilv4df
:
30919 error ("the last argument must be a 4-bit immediate");
30922 case CODE_FOR_sse4_1_blendpd
:
30923 case CODE_FOR_avx_vpermilv2df
:
30924 case CODE_FOR_xop_vpermil2v2df3
:
30925 case CODE_FOR_xop_vpermil2v4sf3
:
30926 case CODE_FOR_xop_vpermil2v4df3
:
30927 case CODE_FOR_xop_vpermil2v8sf3
:
30928 error ("the last argument must be a 2-bit immediate");
30931 case CODE_FOR_avx_vextractf128v4df
:
30932 case CODE_FOR_avx_vextractf128v8sf
:
30933 case CODE_FOR_avx_vextractf128v8si
:
30934 case CODE_FOR_avx_vinsertf128v4df
:
30935 case CODE_FOR_avx_vinsertf128v8sf
:
30936 case CODE_FOR_avx_vinsertf128v8si
:
30937 error ("the last argument must be a 1-bit immediate");
30940 case CODE_FOR_avx_vmcmpv2df3
:
30941 case CODE_FOR_avx_vmcmpv4sf3
:
30942 case CODE_FOR_avx_cmpv2df3
:
30943 case CODE_FOR_avx_cmpv4sf3
:
30944 case CODE_FOR_avx_cmpv4df3
:
30945 case CODE_FOR_avx_cmpv8sf3
:
30946 error ("the last argument must be a 5-bit immediate");
30950 switch (nargs_constant
)
30953 if ((nargs
- i
) == nargs_constant
)
30955 error ("the next to last argument must be an 8-bit immediate");
30959 error ("the last argument must be an 8-bit immediate");
30962 gcc_unreachable ();
30969 if (VECTOR_MODE_P (mode
))
30970 op
= safe_vector_operand (op
, mode
);
30972 /* If we aren't optimizing, only allow one memory operand to
30974 if (memory_operand (op
, mode
))
30977 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
30979 if (optimize
|| !match
|| num_memory
> 1)
30980 op
= copy_to_mode_reg (mode
, op
);
30984 op
= copy_to_reg (op
);
30985 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
30990 args
[i
].mode
= mode
;
30996 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
30999 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31002 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31006 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31007 args
[2].op
, args
[3].op
);
31010 gcc_unreachable ();
31020 /* Subroutine of ix86_expand_builtin to take care of special insns
31021 with variable number of operands. */
31024 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31025 tree exp
, rtx target
)
31029 unsigned int i
, nargs
, arg_adjust
, memory
;
31033 enum machine_mode mode
;
31035 enum insn_code icode
= d
->icode
;
31036 bool last_arg_constant
= false;
31037 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31038 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31039 enum { load
, store
} klass
;
31041 switch ((enum ix86_builtin_func_type
) d
->flag
)
31043 case VOID_FTYPE_VOID
:
31044 emit_insn (GEN_FCN (icode
) (target
));
31046 case VOID_FTYPE_UINT64
:
31047 case VOID_FTYPE_UNSIGNED
:
31053 case INT_FTYPE_VOID
:
31054 case UINT64_FTYPE_VOID
:
31055 case UNSIGNED_FTYPE_VOID
:
31060 case UINT64_FTYPE_PUNSIGNED
:
31061 case V2DI_FTYPE_PV2DI
:
31062 case V4DI_FTYPE_PV4DI
:
31063 case V32QI_FTYPE_PCCHAR
:
31064 case V16QI_FTYPE_PCCHAR
:
31065 case V8SF_FTYPE_PCV4SF
:
31066 case V8SF_FTYPE_PCFLOAT
:
31067 case V4SF_FTYPE_PCFLOAT
:
31068 case V4DF_FTYPE_PCV2DF
:
31069 case V4DF_FTYPE_PCDOUBLE
:
31070 case V2DF_FTYPE_PCDOUBLE
:
31071 case VOID_FTYPE_PVOID
:
31076 case VOID_FTYPE_PV2SF_V4SF
:
31077 case VOID_FTYPE_PV4DI_V4DI
:
31078 case VOID_FTYPE_PV2DI_V2DI
:
31079 case VOID_FTYPE_PCHAR_V32QI
:
31080 case VOID_FTYPE_PCHAR_V16QI
:
31081 case VOID_FTYPE_PFLOAT_V8SF
:
31082 case VOID_FTYPE_PFLOAT_V4SF
:
31083 case VOID_FTYPE_PDOUBLE_V4DF
:
31084 case VOID_FTYPE_PDOUBLE_V2DF
:
31085 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31086 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31087 case VOID_FTYPE_PINT_INT
:
31090 /* Reserve memory operand for target. */
31091 memory
= ARRAY_SIZE (args
);
31093 case V4SF_FTYPE_V4SF_PCV2SF
:
31094 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31099 case V8SF_FTYPE_PCV8SF_V8SI
:
31100 case V4DF_FTYPE_PCV4DF_V4DI
:
31101 case V4SF_FTYPE_PCV4SF_V4SI
:
31102 case V2DF_FTYPE_PCV2DF_V2DI
:
31103 case V8SI_FTYPE_PCV8SI_V8SI
:
31104 case V4DI_FTYPE_PCV4DI_V4DI
:
31105 case V4SI_FTYPE_PCV4SI_V4SI
:
31106 case V2DI_FTYPE_PCV2DI_V2DI
:
31111 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31112 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31113 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31114 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31115 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31116 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31117 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31118 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31121 /* Reserve memory operand for target. */
31122 memory
= ARRAY_SIZE (args
);
31124 case VOID_FTYPE_UINT_UINT_UINT
:
31125 case VOID_FTYPE_UINT64_UINT_UINT
:
31126 case UCHAR_FTYPE_UINT_UINT_UINT
:
31127 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31130 memory
= ARRAY_SIZE (args
);
31131 last_arg_constant
= true;
31134 gcc_unreachable ();
31137 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31139 if (klass
== store
)
31141 arg
= CALL_EXPR_ARG (exp
, 0);
31142 op
= expand_normal (arg
);
31143 gcc_assert (target
== 0);
31146 if (GET_MODE (op
) != Pmode
)
31147 op
= convert_to_mode (Pmode
, op
, 1);
31148 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
31151 target
= force_reg (tmode
, op
);
31159 || !register_operand (target
, tmode
)
31160 || GET_MODE (target
) != tmode
)
31161 target
= gen_reg_rtx (tmode
);
31164 for (i
= 0; i
< nargs
; i
++)
31166 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31169 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31170 op
= expand_normal (arg
);
31171 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31173 if (last_arg_constant
&& (i
+ 1) == nargs
)
31177 if (icode
== CODE_FOR_lwp_lwpvalsi3
31178 || icode
== CODE_FOR_lwp_lwpinssi3
31179 || icode
== CODE_FOR_lwp_lwpvaldi3
31180 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31181 error ("the last argument must be a 32-bit immediate");
31183 error ("the last argument must be an 8-bit immediate");
31191 /* This must be the memory operand. */
31192 if (GET_MODE (op
) != Pmode
)
31193 op
= convert_to_mode (Pmode
, op
, 1);
31194 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
31195 gcc_assert (GET_MODE (op
) == mode
31196 || GET_MODE (op
) == VOIDmode
);
31200 /* This must be register. */
31201 if (VECTOR_MODE_P (mode
))
31202 op
= safe_vector_operand (op
, mode
);
31204 gcc_assert (GET_MODE (op
) == mode
31205 || GET_MODE (op
) == VOIDmode
);
31206 op
= copy_to_mode_reg (mode
, op
);
31211 args
[i
].mode
= mode
;
31217 pat
= GEN_FCN (icode
) (target
);
31220 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31223 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31226 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31229 gcc_unreachable ();
31235 return klass
== store
? 0 : target
;
31238 /* Return the integer constant in ARG. Constrain it to be in the range
31239 of the subparts of VEC_TYPE; issue an error if not. */
31242 get_element_number (tree vec_type
, tree arg
)
31244 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31246 if (!host_integerp (arg
, 1)
31247 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31249 error ("selector must be an integer constant in the range 0..%wi", max
);
31256 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31257 ix86_expand_vector_init. We DO have language-level syntax for this, in
31258 the form of (type){ init-list }. Except that since we can't place emms
31259 instructions from inside the compiler, we can't allow the use of MMX
31260 registers unless the user explicitly asks for it. So we do *not* define
31261 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31262 we have builtins invoked by mmintrin.h that gives us license to emit
31263 these sorts of instructions. */
31266 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31268 enum machine_mode tmode
= TYPE_MODE (type
);
31269 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31270 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31271 rtvec v
= rtvec_alloc (n_elt
);
31273 gcc_assert (VECTOR_MODE_P (tmode
));
31274 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31276 for (i
= 0; i
< n_elt
; ++i
)
31278 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31279 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31282 if (!target
|| !register_operand (target
, tmode
))
31283 target
= gen_reg_rtx (tmode
);
31285 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31289 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31290 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31291 had a language-level syntax for referencing vector elements. */
31294 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31296 enum machine_mode tmode
, mode0
;
31301 arg0
= CALL_EXPR_ARG (exp
, 0);
31302 arg1
= CALL_EXPR_ARG (exp
, 1);
31304 op0
= expand_normal (arg0
);
31305 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
31307 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31308 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
31309 gcc_assert (VECTOR_MODE_P (mode0
));
31311 op0
= force_reg (mode0
, op0
);
31313 if (optimize
|| !target
|| !register_operand (target
, tmode
))
31314 target
= gen_reg_rtx (tmode
);
31316 ix86_expand_vector_extract (true, target
, op0
, elt
);
31321 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31322 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31323 a language-level syntax for referencing vector elements. */
31326 ix86_expand_vec_set_builtin (tree exp
)
31328 enum machine_mode tmode
, mode1
;
31329 tree arg0
, arg1
, arg2
;
31331 rtx op0
, op1
, target
;
31333 arg0
= CALL_EXPR_ARG (exp
, 0);
31334 arg1
= CALL_EXPR_ARG (exp
, 1);
31335 arg2
= CALL_EXPR_ARG (exp
, 2);
31337 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
31338 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31339 gcc_assert (VECTOR_MODE_P (tmode
));
31341 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
31342 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
31343 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
31345 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
31346 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
31348 op0
= force_reg (tmode
, op0
);
31349 op1
= force_reg (mode1
, op1
);
31351 /* OP0 is the source of these builtin functions and shouldn't be
31352 modified. Create a copy, use it and return it as target. */
31353 target
= gen_reg_rtx (tmode
);
31354 emit_move_insn (target
, op0
);
31355 ix86_expand_vector_set (true, target
, op1
, elt
);
31360 /* Expand an expression EXP that calls a built-in function,
31361 with result going to TARGET if that's convenient
31362 (and in mode MODE if that's convenient).
31363 SUBTARGET may be used as the target for computing one of EXP's operands.
31364 IGNORE is nonzero if the value is to be ignored. */
31367 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
31368 enum machine_mode mode ATTRIBUTE_UNUSED
,
31369 int ignore ATTRIBUTE_UNUSED
)
31371 const struct builtin_description
*d
;
31373 enum insn_code icode
;
31374 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
31375 tree arg0
, arg1
, arg2
, arg3
, arg4
;
31376 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
31377 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
31378 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
31380 /* For CPU builtins that can be folded, fold first and expand the fold. */
31383 case IX86_BUILTIN_CPU_INIT
:
31385 /* Make it call __cpu_indicator_init in libgcc. */
31386 tree call_expr
, fndecl
, type
;
31387 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
31388 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
31389 call_expr
= build_call_expr (fndecl
, 0);
31390 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
31392 case IX86_BUILTIN_CPU_IS
:
31393 case IX86_BUILTIN_CPU_SUPPORTS
:
31395 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31396 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
31397 gcc_assert (fold_expr
!= NULL_TREE
);
31398 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
31402 /* Determine whether the builtin function is available under the current ISA.
31403 Originally the builtin was not created if it wasn't applicable to the
31404 current ISA based on the command line switches. With function specific
31405 options, we need to check in the context of the function making the call
31406 whether it is supported. */
31407 if (ix86_builtins_isa
[fcode
].isa
31408 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
31410 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
31411 NULL
, (enum fpmath_unit
) 0, false);
31414 error ("%qE needs unknown isa option", fndecl
);
31417 gcc_assert (opts
!= NULL
);
31418 error ("%qE needs isa option %s", fndecl
, opts
);
31426 case IX86_BUILTIN_MASKMOVQ
:
31427 case IX86_BUILTIN_MASKMOVDQU
:
31428 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
31429 ? CODE_FOR_mmx_maskmovq
31430 : CODE_FOR_sse2_maskmovdqu
);
31431 /* Note the arg order is different from the operand order. */
31432 arg1
= CALL_EXPR_ARG (exp
, 0);
31433 arg2
= CALL_EXPR_ARG (exp
, 1);
31434 arg0
= CALL_EXPR_ARG (exp
, 2);
31435 op0
= expand_normal (arg0
);
31436 op1
= expand_normal (arg1
);
31437 op2
= expand_normal (arg2
);
31438 mode0
= insn_data
[icode
].operand
[0].mode
;
31439 mode1
= insn_data
[icode
].operand
[1].mode
;
31440 mode2
= insn_data
[icode
].operand
[2].mode
;
31442 if (GET_MODE (op0
) != Pmode
)
31443 op0
= convert_to_mode (Pmode
, op0
, 1);
31444 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
31446 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
31447 op0
= copy_to_mode_reg (mode0
, op0
);
31448 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
31449 op1
= copy_to_mode_reg (mode1
, op1
);
31450 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
31451 op2
= copy_to_mode_reg (mode2
, op2
);
31452 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31458 case IX86_BUILTIN_LDMXCSR
:
31459 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
31460 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31461 emit_move_insn (target
, op0
);
31462 emit_insn (gen_sse_ldmxcsr (target
));
31465 case IX86_BUILTIN_STMXCSR
:
31466 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31467 emit_insn (gen_sse_stmxcsr (target
));
31468 return copy_to_mode_reg (SImode
, target
);
31470 case IX86_BUILTIN_CLFLUSH
:
31471 arg0
= CALL_EXPR_ARG (exp
, 0);
31472 op0
= expand_normal (arg0
);
31473 icode
= CODE_FOR_sse2_clflush
;
31474 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31476 if (GET_MODE (op0
) != Pmode
)
31477 op0
= convert_to_mode (Pmode
, op0
, 1);
31478 op0
= force_reg (Pmode
, op0
);
31481 emit_insn (gen_sse2_clflush (op0
));
31484 case IX86_BUILTIN_MONITOR
:
31485 arg0
= CALL_EXPR_ARG (exp
, 0);
31486 arg1
= CALL_EXPR_ARG (exp
, 1);
31487 arg2
= CALL_EXPR_ARG (exp
, 2);
31488 op0
= expand_normal (arg0
);
31489 op1
= expand_normal (arg1
);
31490 op2
= expand_normal (arg2
);
31493 if (GET_MODE (op0
) != Pmode
)
31494 op0
= convert_to_mode (Pmode
, op0
, 1);
31495 op0
= force_reg (Pmode
, op0
);
31498 op1
= copy_to_mode_reg (SImode
, op1
);
31500 op2
= copy_to_mode_reg (SImode
, op2
);
31501 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
31504 case IX86_BUILTIN_MWAIT
:
31505 arg0
= CALL_EXPR_ARG (exp
, 0);
31506 arg1
= CALL_EXPR_ARG (exp
, 1);
31507 op0
= expand_normal (arg0
);
31508 op1
= expand_normal (arg1
);
31510 op0
= copy_to_mode_reg (SImode
, op0
);
31512 op1
= copy_to_mode_reg (SImode
, op1
);
31513 emit_insn (gen_sse3_mwait (op0
, op1
));
31516 case IX86_BUILTIN_VEC_INIT_V2SI
:
31517 case IX86_BUILTIN_VEC_INIT_V4HI
:
31518 case IX86_BUILTIN_VEC_INIT_V8QI
:
31519 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
31521 case IX86_BUILTIN_VEC_EXT_V2DF
:
31522 case IX86_BUILTIN_VEC_EXT_V2DI
:
31523 case IX86_BUILTIN_VEC_EXT_V4SF
:
31524 case IX86_BUILTIN_VEC_EXT_V4SI
:
31525 case IX86_BUILTIN_VEC_EXT_V8HI
:
31526 case IX86_BUILTIN_VEC_EXT_V2SI
:
31527 case IX86_BUILTIN_VEC_EXT_V4HI
:
31528 case IX86_BUILTIN_VEC_EXT_V16QI
:
31529 return ix86_expand_vec_ext_builtin (exp
, target
);
31531 case IX86_BUILTIN_VEC_SET_V2DI
:
31532 case IX86_BUILTIN_VEC_SET_V4SF
:
31533 case IX86_BUILTIN_VEC_SET_V4SI
:
31534 case IX86_BUILTIN_VEC_SET_V8HI
:
31535 case IX86_BUILTIN_VEC_SET_V4HI
:
31536 case IX86_BUILTIN_VEC_SET_V16QI
:
31537 return ix86_expand_vec_set_builtin (exp
);
31539 case IX86_BUILTIN_INFQ
:
31540 case IX86_BUILTIN_HUGE_VALQ
:
31542 REAL_VALUE_TYPE inf
;
31546 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
31548 tmp
= validize_mem (force_const_mem (mode
, tmp
));
31551 target
= gen_reg_rtx (mode
);
31553 emit_move_insn (target
, tmp
);
31557 case IX86_BUILTIN_RDPMC
:
31558 case IX86_BUILTIN_RDTSC
:
31559 case IX86_BUILTIN_RDTSCP
:
31561 op0
= gen_reg_rtx (DImode
);
31562 op1
= gen_reg_rtx (DImode
);
31564 if (fcode
== IX86_BUILTIN_RDPMC
)
31566 arg0
= CALL_EXPR_ARG (exp
, 0);
31567 op2
= expand_normal (arg0
);
31568 if (!register_operand (op2
, SImode
))
31569 op2
= copy_to_mode_reg (SImode
, op2
);
31571 insn
= (TARGET_64BIT
31572 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
31573 : gen_rdpmc (op0
, op2
));
31576 else if (fcode
== IX86_BUILTIN_RDTSC
)
31578 insn
= (TARGET_64BIT
31579 ? gen_rdtsc_rex64 (op0
, op1
)
31580 : gen_rdtsc (op0
));
31585 op2
= gen_reg_rtx (SImode
);
31587 insn
= (TARGET_64BIT
31588 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
31589 : gen_rdtscp (op0
, op2
));
31592 arg0
= CALL_EXPR_ARG (exp
, 0);
31593 op4
= expand_normal (arg0
);
31594 if (!address_operand (op4
, VOIDmode
))
31596 op4
= convert_memory_address (Pmode
, op4
);
31597 op4
= copy_addr_to_reg (op4
);
31599 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
31603 target
= gen_reg_rtx (mode
);
31607 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
31608 op1
, 1, OPTAB_DIRECT
);
31609 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
31610 op0
, 1, OPTAB_DIRECT
);
31613 emit_move_insn (target
, op0
);
31616 case IX86_BUILTIN_FXSAVE
:
31617 case IX86_BUILTIN_FXRSTOR
:
31618 case IX86_BUILTIN_FXSAVE64
:
31619 case IX86_BUILTIN_FXRSTOR64
:
31622 case IX86_BUILTIN_FXSAVE
:
31623 icode
= CODE_FOR_fxsave
;
31625 case IX86_BUILTIN_FXRSTOR
:
31626 icode
= CODE_FOR_fxrstor
;
31628 case IX86_BUILTIN_FXSAVE64
:
31629 icode
= CODE_FOR_fxsave64
;
31631 case IX86_BUILTIN_FXRSTOR64
:
31632 icode
= CODE_FOR_fxrstor64
;
31635 gcc_unreachable ();
31638 arg0
= CALL_EXPR_ARG (exp
, 0);
31639 op0
= expand_normal (arg0
);
31641 if (!address_operand (op0
, VOIDmode
))
31643 op0
= convert_memory_address (Pmode
, op0
);
31644 op0
= copy_addr_to_reg (op0
);
31646 op0
= gen_rtx_MEM (BLKmode
, op0
);
31648 pat
= GEN_FCN (icode
) (op0
);
31653 case IX86_BUILTIN_XSAVE
:
31654 case IX86_BUILTIN_XRSTOR
:
31655 case IX86_BUILTIN_XSAVE64
:
31656 case IX86_BUILTIN_XRSTOR64
:
31657 case IX86_BUILTIN_XSAVEOPT
:
31658 case IX86_BUILTIN_XSAVEOPT64
:
31659 arg0
= CALL_EXPR_ARG (exp
, 0);
31660 arg1
= CALL_EXPR_ARG (exp
, 1);
31661 op0
= expand_normal (arg0
);
31662 op1
= expand_normal (arg1
);
31664 if (!address_operand (op0
, VOIDmode
))
31666 op0
= convert_memory_address (Pmode
, op0
);
31667 op0
= copy_addr_to_reg (op0
);
31669 op0
= gen_rtx_MEM (BLKmode
, op0
);
31671 op1
= force_reg (DImode
, op1
);
31675 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
31676 NULL
, 1, OPTAB_DIRECT
);
31679 case IX86_BUILTIN_XSAVE
:
31680 icode
= CODE_FOR_xsave_rex64
;
31682 case IX86_BUILTIN_XRSTOR
:
31683 icode
= CODE_FOR_xrstor_rex64
;
31685 case IX86_BUILTIN_XSAVE64
:
31686 icode
= CODE_FOR_xsave64
;
31688 case IX86_BUILTIN_XRSTOR64
:
31689 icode
= CODE_FOR_xrstor64
;
31691 case IX86_BUILTIN_XSAVEOPT
:
31692 icode
= CODE_FOR_xsaveopt_rex64
;
31694 case IX86_BUILTIN_XSAVEOPT64
:
31695 icode
= CODE_FOR_xsaveopt64
;
31698 gcc_unreachable ();
31701 op2
= gen_lowpart (SImode
, op2
);
31702 op1
= gen_lowpart (SImode
, op1
);
31703 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31709 case IX86_BUILTIN_XSAVE
:
31710 icode
= CODE_FOR_xsave
;
31712 case IX86_BUILTIN_XRSTOR
:
31713 icode
= CODE_FOR_xrstor
;
31715 case IX86_BUILTIN_XSAVEOPT
:
31716 icode
= CODE_FOR_xsaveopt
;
31719 gcc_unreachable ();
31721 pat
= GEN_FCN (icode
) (op0
, op1
);
31728 case IX86_BUILTIN_LLWPCB
:
31729 arg0
= CALL_EXPR_ARG (exp
, 0);
31730 op0
= expand_normal (arg0
);
31731 icode
= CODE_FOR_lwp_llwpcb
;
31732 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31734 if (GET_MODE (op0
) != Pmode
)
31735 op0
= convert_to_mode (Pmode
, op0
, 1);
31736 op0
= force_reg (Pmode
, op0
);
31738 emit_insn (gen_lwp_llwpcb (op0
));
31741 case IX86_BUILTIN_SLWPCB
:
31742 icode
= CODE_FOR_lwp_slwpcb
;
31744 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
31745 target
= gen_reg_rtx (Pmode
);
31746 emit_insn (gen_lwp_slwpcb (target
));
31749 case IX86_BUILTIN_BEXTRI32
:
31750 case IX86_BUILTIN_BEXTRI64
:
31751 arg0
= CALL_EXPR_ARG (exp
, 0);
31752 arg1
= CALL_EXPR_ARG (exp
, 1);
31753 op0
= expand_normal (arg0
);
31754 op1
= expand_normal (arg1
);
31755 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
31756 ? CODE_FOR_tbm_bextri_si
31757 : CODE_FOR_tbm_bextri_di
);
31758 if (!CONST_INT_P (op1
))
31760 error ("last argument must be an immediate");
31765 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
31766 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
31767 op1
= GEN_INT (length
);
31768 op2
= GEN_INT (lsb_index
);
31769 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
31775 case IX86_BUILTIN_RDRAND16_STEP
:
31776 icode
= CODE_FOR_rdrandhi_1
;
31780 case IX86_BUILTIN_RDRAND32_STEP
:
31781 icode
= CODE_FOR_rdrandsi_1
;
31785 case IX86_BUILTIN_RDRAND64_STEP
:
31786 icode
= CODE_FOR_rdranddi_1
;
31790 op0
= gen_reg_rtx (mode0
);
31791 emit_insn (GEN_FCN (icode
) (op0
));
31793 arg0
= CALL_EXPR_ARG (exp
, 0);
31794 op1
= expand_normal (arg0
);
31795 if (!address_operand (op1
, VOIDmode
))
31797 op1
= convert_memory_address (Pmode
, op1
);
31798 op1
= copy_addr_to_reg (op1
);
31800 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31802 op1
= gen_reg_rtx (SImode
);
31803 emit_move_insn (op1
, CONST1_RTX (SImode
));
31805 /* Emit SImode conditional move. */
31806 if (mode0
== HImode
)
31808 op2
= gen_reg_rtx (SImode
);
31809 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
31811 else if (mode0
== SImode
)
31814 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
31817 target
= gen_reg_rtx (SImode
);
31819 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
31821 emit_insn (gen_rtx_SET (VOIDmode
, target
,
31822 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
31825 case IX86_BUILTIN_RDSEED16_STEP
:
31826 icode
= CODE_FOR_rdseedhi_1
;
31830 case IX86_BUILTIN_RDSEED32_STEP
:
31831 icode
= CODE_FOR_rdseedsi_1
;
31835 case IX86_BUILTIN_RDSEED64_STEP
:
31836 icode
= CODE_FOR_rdseeddi_1
;
31840 op0
= gen_reg_rtx (mode0
);
31841 emit_insn (GEN_FCN (icode
) (op0
));
31843 arg0
= CALL_EXPR_ARG (exp
, 0);
31844 op1
= expand_normal (arg0
);
31845 if (!address_operand (op1
, VOIDmode
))
31847 op1
= convert_memory_address (Pmode
, op1
);
31848 op1
= copy_addr_to_reg (op1
);
31850 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31852 op2
= gen_reg_rtx (QImode
);
31854 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
31856 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
31859 target
= gen_reg_rtx (SImode
);
31861 emit_insn (gen_zero_extendqisi2 (target
, op2
));
31864 case IX86_BUILTIN_ADDCARRYX32
:
31865 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
31869 case IX86_BUILTIN_ADDCARRYX64
:
31870 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
31874 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
31875 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
31876 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
31877 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
31879 op0
= gen_reg_rtx (QImode
);
31881 /* Generate CF from input operand. */
31882 op1
= expand_normal (arg0
);
31883 if (GET_MODE (op1
) != QImode
)
31884 op1
= convert_to_mode (QImode
, op1
, 1);
31885 op1
= copy_to_mode_reg (QImode
, op1
);
31886 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
31888 /* Gen ADCX instruction to compute X+Y+CF. */
31889 op2
= expand_normal (arg1
);
31890 op3
= expand_normal (arg2
);
31893 op2
= copy_to_mode_reg (mode0
, op2
);
31895 op3
= copy_to_mode_reg (mode0
, op3
);
31897 op0
= gen_reg_rtx (mode0
);
31899 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
31900 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
31901 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
31903 /* Store the result. */
31904 op4
= expand_normal (arg3
);
31905 if (!address_operand (op4
, VOIDmode
))
31907 op4
= convert_memory_address (Pmode
, op4
);
31908 op4
= copy_addr_to_reg (op4
);
31910 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
31912 /* Return current CF value. */
31914 target
= gen_reg_rtx (QImode
);
31916 PUT_MODE (pat
, QImode
);
31917 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
31920 case IX86_BUILTIN_GATHERSIV2DF
:
31921 icode
= CODE_FOR_avx2_gathersiv2df
;
31923 case IX86_BUILTIN_GATHERSIV4DF
:
31924 icode
= CODE_FOR_avx2_gathersiv4df
;
31926 case IX86_BUILTIN_GATHERDIV2DF
:
31927 icode
= CODE_FOR_avx2_gatherdiv2df
;
31929 case IX86_BUILTIN_GATHERDIV4DF
:
31930 icode
= CODE_FOR_avx2_gatherdiv4df
;
31932 case IX86_BUILTIN_GATHERSIV4SF
:
31933 icode
= CODE_FOR_avx2_gathersiv4sf
;
31935 case IX86_BUILTIN_GATHERSIV8SF
:
31936 icode
= CODE_FOR_avx2_gathersiv8sf
;
31938 case IX86_BUILTIN_GATHERDIV4SF
:
31939 icode
= CODE_FOR_avx2_gatherdiv4sf
;
31941 case IX86_BUILTIN_GATHERDIV8SF
:
31942 icode
= CODE_FOR_avx2_gatherdiv8sf
;
31944 case IX86_BUILTIN_GATHERSIV2DI
:
31945 icode
= CODE_FOR_avx2_gathersiv2di
;
31947 case IX86_BUILTIN_GATHERSIV4DI
:
31948 icode
= CODE_FOR_avx2_gathersiv4di
;
31950 case IX86_BUILTIN_GATHERDIV2DI
:
31951 icode
= CODE_FOR_avx2_gatherdiv2di
;
31953 case IX86_BUILTIN_GATHERDIV4DI
:
31954 icode
= CODE_FOR_avx2_gatherdiv4di
;
31956 case IX86_BUILTIN_GATHERSIV4SI
:
31957 icode
= CODE_FOR_avx2_gathersiv4si
;
31959 case IX86_BUILTIN_GATHERSIV8SI
:
31960 icode
= CODE_FOR_avx2_gathersiv8si
;
31962 case IX86_BUILTIN_GATHERDIV4SI
:
31963 icode
= CODE_FOR_avx2_gatherdiv4si
;
31965 case IX86_BUILTIN_GATHERDIV8SI
:
31966 icode
= CODE_FOR_avx2_gatherdiv8si
;
31968 case IX86_BUILTIN_GATHERALTSIV4DF
:
31969 icode
= CODE_FOR_avx2_gathersiv4df
;
31971 case IX86_BUILTIN_GATHERALTDIV8SF
:
31972 icode
= CODE_FOR_avx2_gatherdiv8sf
;
31974 case IX86_BUILTIN_GATHERALTSIV4DI
:
31975 icode
= CODE_FOR_avx2_gathersiv4di
;
31977 case IX86_BUILTIN_GATHERALTDIV8SI
:
31978 icode
= CODE_FOR_avx2_gatherdiv8si
;
31982 arg0
= CALL_EXPR_ARG (exp
, 0);
31983 arg1
= CALL_EXPR_ARG (exp
, 1);
31984 arg2
= CALL_EXPR_ARG (exp
, 2);
31985 arg3
= CALL_EXPR_ARG (exp
, 3);
31986 arg4
= CALL_EXPR_ARG (exp
, 4);
31987 op0
= expand_normal (arg0
);
31988 op1
= expand_normal (arg1
);
31989 op2
= expand_normal (arg2
);
31990 op3
= expand_normal (arg3
);
31991 op4
= expand_normal (arg4
);
31992 /* Note the arg order is different from the operand order. */
31993 mode0
= insn_data
[icode
].operand
[1].mode
;
31994 mode2
= insn_data
[icode
].operand
[3].mode
;
31995 mode3
= insn_data
[icode
].operand
[4].mode
;
31996 mode4
= insn_data
[icode
].operand
[5].mode
;
31998 if (target
== NULL_RTX
31999 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32000 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32002 subtarget
= target
;
32004 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32005 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32007 rtx half
= gen_reg_rtx (V4SImode
);
32008 if (!nonimmediate_operand (op2
, V8SImode
))
32009 op2
= copy_to_mode_reg (V8SImode
, op2
);
32010 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32013 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32014 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32016 rtx (*gen
) (rtx
, rtx
);
32017 rtx half
= gen_reg_rtx (mode0
);
32018 if (mode0
== V4SFmode
)
32019 gen
= gen_vec_extract_lo_v8sf
;
32021 gen
= gen_vec_extract_lo_v8si
;
32022 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32023 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32024 emit_insn (gen (half
, op0
));
32026 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32027 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32028 emit_insn (gen (half
, op3
));
32032 /* Force memory operand only with base register here. But we
32033 don't want to do it on memory operand for other builtin
32035 if (GET_MODE (op1
) != Pmode
)
32036 op1
= convert_to_mode (Pmode
, op1
, 1);
32037 op1
= force_reg (Pmode
, op1
);
32039 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32040 op0
= copy_to_mode_reg (mode0
, op0
);
32041 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32042 op1
= copy_to_mode_reg (Pmode
, op1
);
32043 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32044 op2
= copy_to_mode_reg (mode2
, op2
);
32045 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32046 op3
= copy_to_mode_reg (mode3
, op3
);
32047 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32049 error ("last argument must be scale 1, 2, 4, 8");
32053 /* Optimize. If mask is known to have all high bits set,
32054 replace op0 with pc_rtx to signal that the instruction
32055 overwrites the whole destination and doesn't use its
32056 previous contents. */
32059 if (TREE_CODE (arg3
) == VECTOR_CST
)
32061 unsigned int negative
= 0;
32062 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32064 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32065 if (TREE_CODE (cst
) == INTEGER_CST
32066 && tree_int_cst_sign_bit (cst
))
32068 else if (TREE_CODE (cst
) == REAL_CST
32069 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32072 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32075 else if (TREE_CODE (arg3
) == SSA_NAME
)
32077 /* Recognize also when mask is like:
32078 __v2df src = _mm_setzero_pd ();
32079 __v2df mask = _mm_cmpeq_pd (src, src);
32081 __v8sf src = _mm256_setzero_ps ();
32082 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32083 as that is a cheaper way to load all ones into
32084 a register than having to load a constant from
32086 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32087 if (is_gimple_call (def_stmt
))
32089 tree fndecl
= gimple_call_fndecl (def_stmt
);
32091 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32092 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32094 case IX86_BUILTIN_CMPPD
:
32095 case IX86_BUILTIN_CMPPS
:
32096 case IX86_BUILTIN_CMPPD256
:
32097 case IX86_BUILTIN_CMPPS256
:
32098 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32101 case IX86_BUILTIN_CMPEQPD
:
32102 case IX86_BUILTIN_CMPEQPS
:
32103 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32104 && initializer_zerop (gimple_call_arg (def_stmt
,
32115 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32120 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32121 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32123 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32124 ? V4SFmode
: V4SImode
;
32125 if (target
== NULL_RTX
)
32126 target
= gen_reg_rtx (tmode
);
32127 if (tmode
== V4SFmode
)
32128 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32130 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32133 target
= subtarget
;
32137 case IX86_BUILTIN_XABORT
:
32138 icode
= CODE_FOR_xabort
;
32139 arg0
= CALL_EXPR_ARG (exp
, 0);
32140 op0
= expand_normal (arg0
);
32141 mode0
= insn_data
[icode
].operand
[0].mode
;
32142 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32144 error ("the xabort's argument must be an 8-bit immediate");
32147 emit_insn (gen_xabort (op0
));
32154 for (i
= 0, d
= bdesc_special_args
;
32155 i
< ARRAY_SIZE (bdesc_special_args
);
32157 if (d
->code
== fcode
)
32158 return ix86_expand_special_args_builtin (d
, exp
, target
);
32160 for (i
= 0, d
= bdesc_args
;
32161 i
< ARRAY_SIZE (bdesc_args
);
32163 if (d
->code
== fcode
)
32166 case IX86_BUILTIN_FABSQ
:
32167 case IX86_BUILTIN_COPYSIGNQ
:
32169 /* Emit a normal call if SSE isn't available. */
32170 return expand_call (exp
, target
, ignore
);
32172 return ix86_expand_args_builtin (d
, exp
, target
);
32175 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32176 if (d
->code
== fcode
)
32177 return ix86_expand_sse_comi (d
, exp
, target
);
32179 for (i
= 0, d
= bdesc_pcmpestr
;
32180 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32182 if (d
->code
== fcode
)
32183 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32185 for (i
= 0, d
= bdesc_pcmpistr
;
32186 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32188 if (d
->code
== fcode
)
32189 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32191 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32192 if (d
->code
== fcode
)
32193 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32194 (enum ix86_builtin_func_type
)
32195 d
->flag
, d
->comparison
);
32197 gcc_unreachable ();
32200 /* Returns a function decl for a vectorized version of the builtin function
32201 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32202 if it is not available. */
32205 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32208 enum machine_mode in_mode
, out_mode
;
32210 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32212 if (TREE_CODE (type_out
) != VECTOR_TYPE
32213 || TREE_CODE (type_in
) != VECTOR_TYPE
32214 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32217 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32218 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32219 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32220 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32224 case BUILT_IN_SQRT
:
32225 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32227 if (out_n
== 2 && in_n
== 2)
32228 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32229 else if (out_n
== 4 && in_n
== 4)
32230 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32234 case BUILT_IN_SQRTF
:
32235 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32237 if (out_n
== 4 && in_n
== 4)
32238 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32239 else if (out_n
== 8 && in_n
== 8)
32240 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32244 case BUILT_IN_IFLOOR
:
32245 case BUILT_IN_LFLOOR
:
32246 case BUILT_IN_LLFLOOR
:
32247 /* The round insn does not trap on denormals. */
32248 if (flag_trapping_math
|| !TARGET_ROUND
)
32251 if (out_mode
== SImode
&& in_mode
== DFmode
)
32253 if (out_n
== 4 && in_n
== 2)
32254 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32255 else if (out_n
== 8 && in_n
== 4)
32256 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32260 case BUILT_IN_IFLOORF
:
32261 case BUILT_IN_LFLOORF
:
32262 case BUILT_IN_LLFLOORF
:
32263 /* The round insn does not trap on denormals. */
32264 if (flag_trapping_math
|| !TARGET_ROUND
)
32267 if (out_mode
== SImode
&& in_mode
== SFmode
)
32269 if (out_n
== 4 && in_n
== 4)
32270 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32271 else if (out_n
== 8 && in_n
== 8)
32272 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32276 case BUILT_IN_ICEIL
:
32277 case BUILT_IN_LCEIL
:
32278 case BUILT_IN_LLCEIL
:
32279 /* The round insn does not trap on denormals. */
32280 if (flag_trapping_math
|| !TARGET_ROUND
)
32283 if (out_mode
== SImode
&& in_mode
== DFmode
)
32285 if (out_n
== 4 && in_n
== 2)
32286 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32287 else if (out_n
== 8 && in_n
== 4)
32288 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32292 case BUILT_IN_ICEILF
:
32293 case BUILT_IN_LCEILF
:
32294 case BUILT_IN_LLCEILF
:
32295 /* The round insn does not trap on denormals. */
32296 if (flag_trapping_math
|| !TARGET_ROUND
)
32299 if (out_mode
== SImode
&& in_mode
== SFmode
)
32301 if (out_n
== 4 && in_n
== 4)
32302 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32303 else if (out_n
== 8 && in_n
== 8)
32304 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32308 case BUILT_IN_IRINT
:
32309 case BUILT_IN_LRINT
:
32310 case BUILT_IN_LLRINT
:
32311 if (out_mode
== SImode
&& in_mode
== DFmode
)
32313 if (out_n
== 4 && in_n
== 2)
32314 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
32315 else if (out_n
== 8 && in_n
== 4)
32316 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
32320 case BUILT_IN_IRINTF
:
32321 case BUILT_IN_LRINTF
:
32322 case BUILT_IN_LLRINTF
:
32323 if (out_mode
== SImode
&& in_mode
== SFmode
)
32325 if (out_n
== 4 && in_n
== 4)
32326 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
32327 else if (out_n
== 8 && in_n
== 8)
32328 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
32332 case BUILT_IN_IROUND
:
32333 case BUILT_IN_LROUND
:
32334 case BUILT_IN_LLROUND
:
32335 /* The round insn does not trap on denormals. */
32336 if (flag_trapping_math
|| !TARGET_ROUND
)
32339 if (out_mode
== SImode
&& in_mode
== DFmode
)
32341 if (out_n
== 4 && in_n
== 2)
32342 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
32343 else if (out_n
== 8 && in_n
== 4)
32344 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
32348 case BUILT_IN_IROUNDF
:
32349 case BUILT_IN_LROUNDF
:
32350 case BUILT_IN_LLROUNDF
:
32351 /* The round insn does not trap on denormals. */
32352 if (flag_trapping_math
|| !TARGET_ROUND
)
32355 if (out_mode
== SImode
&& in_mode
== SFmode
)
32357 if (out_n
== 4 && in_n
== 4)
32358 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
32359 else if (out_n
== 8 && in_n
== 8)
32360 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
32364 case BUILT_IN_COPYSIGN
:
32365 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32367 if (out_n
== 2 && in_n
== 2)
32368 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
32369 else if (out_n
== 4 && in_n
== 4)
32370 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
32374 case BUILT_IN_COPYSIGNF
:
32375 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32377 if (out_n
== 4 && in_n
== 4)
32378 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
32379 else if (out_n
== 8 && in_n
== 8)
32380 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
32384 case BUILT_IN_FLOOR
:
32385 /* The round insn does not trap on denormals. */
32386 if (flag_trapping_math
|| !TARGET_ROUND
)
32389 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32391 if (out_n
== 2 && in_n
== 2)
32392 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
32393 else if (out_n
== 4 && in_n
== 4)
32394 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
32398 case BUILT_IN_FLOORF
:
32399 /* The round insn does not trap on denormals. */
32400 if (flag_trapping_math
|| !TARGET_ROUND
)
32403 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32405 if (out_n
== 4 && in_n
== 4)
32406 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
32407 else if (out_n
== 8 && in_n
== 8)
32408 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
32412 case BUILT_IN_CEIL
:
32413 /* The round insn does not trap on denormals. */
32414 if (flag_trapping_math
|| !TARGET_ROUND
)
32417 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32419 if (out_n
== 2 && in_n
== 2)
32420 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
32421 else if (out_n
== 4 && in_n
== 4)
32422 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
32426 case BUILT_IN_CEILF
:
32427 /* The round insn does not trap on denormals. */
32428 if (flag_trapping_math
|| !TARGET_ROUND
)
32431 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32433 if (out_n
== 4 && in_n
== 4)
32434 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
32435 else if (out_n
== 8 && in_n
== 8)
32436 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
32440 case BUILT_IN_TRUNC
:
32441 /* The round insn does not trap on denormals. */
32442 if (flag_trapping_math
|| !TARGET_ROUND
)
32445 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32447 if (out_n
== 2 && in_n
== 2)
32448 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
32449 else if (out_n
== 4 && in_n
== 4)
32450 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
32454 case BUILT_IN_TRUNCF
:
32455 /* The round insn does not trap on denormals. */
32456 if (flag_trapping_math
|| !TARGET_ROUND
)
32459 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32461 if (out_n
== 4 && in_n
== 4)
32462 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
32463 else if (out_n
== 8 && in_n
== 8)
32464 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
32468 case BUILT_IN_RINT
:
32469 /* The round insn does not trap on denormals. */
32470 if (flag_trapping_math
|| !TARGET_ROUND
)
32473 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32475 if (out_n
== 2 && in_n
== 2)
32476 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
32477 else if (out_n
== 4 && in_n
== 4)
32478 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
32482 case BUILT_IN_RINTF
:
32483 /* The round insn does not trap on denormals. */
32484 if (flag_trapping_math
|| !TARGET_ROUND
)
32487 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32489 if (out_n
== 4 && in_n
== 4)
32490 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
32491 else if (out_n
== 8 && in_n
== 8)
32492 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
32496 case BUILT_IN_ROUND
:
32497 /* The round insn does not trap on denormals. */
32498 if (flag_trapping_math
|| !TARGET_ROUND
)
32501 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32503 if (out_n
== 2 && in_n
== 2)
32504 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
32505 else if (out_n
== 4 && in_n
== 4)
32506 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
32510 case BUILT_IN_ROUNDF
:
32511 /* The round insn does not trap on denormals. */
32512 if (flag_trapping_math
|| !TARGET_ROUND
)
32515 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32517 if (out_n
== 4 && in_n
== 4)
32518 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
32519 else if (out_n
== 8 && in_n
== 8)
32520 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
32525 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32527 if (out_n
== 2 && in_n
== 2)
32528 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
32529 if (out_n
== 4 && in_n
== 4)
32530 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
32534 case BUILT_IN_FMAF
:
32535 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32537 if (out_n
== 4 && in_n
== 4)
32538 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
32539 if (out_n
== 8 && in_n
== 8)
32540 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
32548 /* Dispatch to a handler for a vectorization library. */
32549 if (ix86_veclib_handler
)
32550 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
32556 /* Handler for an SVML-style interface to
32557 a library with vectorized intrinsics. */
32560 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
32563 tree fntype
, new_fndecl
, args
;
32566 enum machine_mode el_mode
, in_mode
;
32569 /* The SVML is suitable for unsafe math only. */
32570 if (!flag_unsafe_math_optimizations
)
32573 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32574 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32575 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32576 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32577 if (el_mode
!= in_mode
32585 case BUILT_IN_LOG10
:
32587 case BUILT_IN_TANH
:
32589 case BUILT_IN_ATAN
:
32590 case BUILT_IN_ATAN2
:
32591 case BUILT_IN_ATANH
:
32592 case BUILT_IN_CBRT
:
32593 case BUILT_IN_SINH
:
32595 case BUILT_IN_ASINH
:
32596 case BUILT_IN_ASIN
:
32597 case BUILT_IN_COSH
:
32599 case BUILT_IN_ACOSH
:
32600 case BUILT_IN_ACOS
:
32601 if (el_mode
!= DFmode
|| n
!= 2)
32605 case BUILT_IN_EXPF
:
32606 case BUILT_IN_LOGF
:
32607 case BUILT_IN_LOG10F
:
32608 case BUILT_IN_POWF
:
32609 case BUILT_IN_TANHF
:
32610 case BUILT_IN_TANF
:
32611 case BUILT_IN_ATANF
:
32612 case BUILT_IN_ATAN2F
:
32613 case BUILT_IN_ATANHF
:
32614 case BUILT_IN_CBRTF
:
32615 case BUILT_IN_SINHF
:
32616 case BUILT_IN_SINF
:
32617 case BUILT_IN_ASINHF
:
32618 case BUILT_IN_ASINF
:
32619 case BUILT_IN_COSHF
:
32620 case BUILT_IN_COSF
:
32621 case BUILT_IN_ACOSHF
:
32622 case BUILT_IN_ACOSF
:
32623 if (el_mode
!= SFmode
|| n
!= 4)
32631 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32633 if (fn
== BUILT_IN_LOGF
)
32634 strcpy (name
, "vmlsLn4");
32635 else if (fn
== BUILT_IN_LOG
)
32636 strcpy (name
, "vmldLn2");
32639 sprintf (name
, "vmls%s", bname
+10);
32640 name
[strlen (name
)-1] = '4';
32643 sprintf (name
, "vmld%s2", bname
+10);
32645 /* Convert to uppercase. */
32649 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32651 args
= TREE_CHAIN (args
))
32655 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32657 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32659 /* Build a function declaration for the vectorized function. */
32660 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32661 FUNCTION_DECL
, get_identifier (name
), fntype
);
32662 TREE_PUBLIC (new_fndecl
) = 1;
32663 DECL_EXTERNAL (new_fndecl
) = 1;
32664 DECL_IS_NOVOPS (new_fndecl
) = 1;
32665 TREE_READONLY (new_fndecl
) = 1;
32670 /* Handler for an ACML-style interface to
32671 a library with vectorized intrinsics. */
32674 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
32676 char name
[20] = "__vr.._";
32677 tree fntype
, new_fndecl
, args
;
32680 enum machine_mode el_mode
, in_mode
;
32683 /* The ACML is 64bits only and suitable for unsafe math only as
32684 it does not correctly support parts of IEEE with the required
32685 precision such as denormals. */
32687 || !flag_unsafe_math_optimizations
)
32690 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32691 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32692 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32693 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32694 if (el_mode
!= in_mode
32704 case BUILT_IN_LOG2
:
32705 case BUILT_IN_LOG10
:
32708 if (el_mode
!= DFmode
32713 case BUILT_IN_SINF
:
32714 case BUILT_IN_COSF
:
32715 case BUILT_IN_EXPF
:
32716 case BUILT_IN_POWF
:
32717 case BUILT_IN_LOGF
:
32718 case BUILT_IN_LOG2F
:
32719 case BUILT_IN_LOG10F
:
32722 if (el_mode
!= SFmode
32731 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32732 sprintf (name
+ 7, "%s", bname
+10);
32735 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32737 args
= TREE_CHAIN (args
))
32741 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32743 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32745 /* Build a function declaration for the vectorized function. */
32746 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32747 FUNCTION_DECL
, get_identifier (name
), fntype
);
32748 TREE_PUBLIC (new_fndecl
) = 1;
32749 DECL_EXTERNAL (new_fndecl
) = 1;
32750 DECL_IS_NOVOPS (new_fndecl
) = 1;
32751 TREE_READONLY (new_fndecl
) = 1;
32756 /* Returns a decl of a function that implements gather load with
32757 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
32758 Return NULL_TREE if it is not available. */
32761 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
32762 const_tree index_type
, int scale
)
32765 enum ix86_builtins code
;
32770 if ((TREE_CODE (index_type
) != INTEGER_TYPE
32771 && !POINTER_TYPE_P (index_type
))
32772 || (TYPE_MODE (index_type
) != SImode
32773 && TYPE_MODE (index_type
) != DImode
))
32776 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
32779 /* v*gather* insn sign extends index to pointer mode. */
32780 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
32781 && TYPE_UNSIGNED (index_type
))
32786 || (scale
& (scale
- 1)) != 0)
32789 si
= TYPE_MODE (index_type
) == SImode
;
32790 switch (TYPE_MODE (mem_vectype
))
32793 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
32796 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
32799 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
32802 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
32805 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
32808 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
32811 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
32814 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
32820 return ix86_builtins
[code
];
32823 /* Returns a code for a target-specific builtin that implements
32824 reciprocal of the function, or NULL_TREE if not available. */
32827 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
32828 bool sqrt ATTRIBUTE_UNUSED
)
32830 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
32831 && flag_finite_math_only
&& !flag_trapping_math
32832 && flag_unsafe_math_optimizations
))
32836 /* Machine dependent builtins. */
32839 /* Vectorized version of sqrt to rsqrt conversion. */
32840 case IX86_BUILTIN_SQRTPS_NR
:
32841 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
32843 case IX86_BUILTIN_SQRTPS_NR256
:
32844 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
32850 /* Normal builtins. */
32853 /* Sqrt to rsqrt conversion. */
32854 case BUILT_IN_SQRTF
:
32855 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
32862 /* Helper for avx_vpermilps256_operand et al. This is also used by
32863 the expansion functions to turn the parallel back into a mask.
32864 The return value is 0 for no match and the imm8+1 for a match. */
32867 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
32869 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
32871 unsigned char ipar
[8];
32873 if (XVECLEN (par
, 0) != (int) nelt
)
32876 /* Validate that all of the elements are constants, and not totally
32877 out of range. Copy the data into an integral array to make the
32878 subsequent checks easier. */
32879 for (i
= 0; i
< nelt
; ++i
)
32881 rtx er
= XVECEXP (par
, 0, i
);
32882 unsigned HOST_WIDE_INT ei
;
32884 if (!CONST_INT_P (er
))
32895 /* In the 256-bit DFmode case, we can only move elements within
32897 for (i
= 0; i
< 2; ++i
)
32901 mask
|= ipar
[i
] << i
;
32903 for (i
= 2; i
< 4; ++i
)
32907 mask
|= (ipar
[i
] - 2) << i
;
32912 /* In the 256-bit SFmode case, we have full freedom of movement
32913 within the low 128-bit lane, but the high 128-bit lane must
32914 mirror the exact same pattern. */
32915 for (i
= 0; i
< 4; ++i
)
32916 if (ipar
[i
] + 4 != ipar
[i
+ 4])
32923 /* In the 128-bit case, we've full freedom in the placement of
32924 the elements from the source operand. */
32925 for (i
= 0; i
< nelt
; ++i
)
32926 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
32930 gcc_unreachable ();
32933 /* Make sure success has a non-zero value by adding one. */
32937 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
32938 the expansion functions to turn the parallel back into a mask.
32939 The return value is 0 for no match and the imm8+1 for a match. */
32942 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
32944 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
32946 unsigned char ipar
[8];
32948 if (XVECLEN (par
, 0) != (int) nelt
)
32951 /* Validate that all of the elements are constants, and not totally
32952 out of range. Copy the data into an integral array to make the
32953 subsequent checks easier. */
32954 for (i
= 0; i
< nelt
; ++i
)
32956 rtx er
= XVECEXP (par
, 0, i
);
32957 unsigned HOST_WIDE_INT ei
;
32959 if (!CONST_INT_P (er
))
32962 if (ei
>= 2 * nelt
)
32967 /* Validate that the halves of the permute are halves. */
32968 for (i
= 0; i
< nelt2
- 1; ++i
)
32969 if (ipar
[i
] + 1 != ipar
[i
+ 1])
32971 for (i
= nelt2
; i
< nelt
- 1; ++i
)
32972 if (ipar
[i
] + 1 != ipar
[i
+ 1])
32975 /* Reconstruct the mask. */
32976 for (i
= 0; i
< 2; ++i
)
32978 unsigned e
= ipar
[i
* nelt2
];
32982 mask
|= e
<< (i
* 4);
32985 /* Make sure success has a non-zero value by adding one. */
32989 /* Store OPERAND to the memory after reload is completed. This means
32990 that we can't easily use assign_stack_local. */
32992 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
32996 gcc_assert (reload_completed
);
32997 if (ix86_using_red_zone ())
32999 result
= gen_rtx_MEM (mode
,
33000 gen_rtx_PLUS (Pmode
,
33002 GEN_INT (-RED_ZONE_SIZE
)));
33003 emit_move_insn (result
, operand
);
33005 else if (TARGET_64BIT
)
33011 operand
= gen_lowpart (DImode
, operand
);
33015 gen_rtx_SET (VOIDmode
,
33016 gen_rtx_MEM (DImode
,
33017 gen_rtx_PRE_DEC (DImode
,
33018 stack_pointer_rtx
)),
33022 gcc_unreachable ();
33024 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33033 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33035 gen_rtx_SET (VOIDmode
,
33036 gen_rtx_MEM (SImode
,
33037 gen_rtx_PRE_DEC (Pmode
,
33038 stack_pointer_rtx
)),
33041 gen_rtx_SET (VOIDmode
,
33042 gen_rtx_MEM (SImode
,
33043 gen_rtx_PRE_DEC (Pmode
,
33044 stack_pointer_rtx
)),
33049 /* Store HImodes as SImodes. */
33050 operand
= gen_lowpart (SImode
, operand
);
33054 gen_rtx_SET (VOIDmode
,
33055 gen_rtx_MEM (GET_MODE (operand
),
33056 gen_rtx_PRE_DEC (SImode
,
33057 stack_pointer_rtx
)),
33061 gcc_unreachable ();
33063 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33068 /* Free operand from the memory. */
33070 ix86_free_from_memory (enum machine_mode mode
)
33072 if (!ix86_using_red_zone ())
33076 if (mode
== DImode
|| TARGET_64BIT
)
33080 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33081 to pop or add instruction if registers are available. */
33082 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33083 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33088 /* Return a register priority for hard reg REGNO. */
33090 ix86_register_priority (int hard_regno
)
33092 /* ebp and r13 as the base always wants a displacement, r12 as the
33093 base always wants an index. So discourage their usage in an
33095 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33097 if (hard_regno
== BP_REG
)
33099 /* New x86-64 int registers result in bigger code size. Discourage
33101 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33103 /* New x86-64 SSE registers result in bigger code size. Discourage
33105 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33107 /* Usage of AX register results in smaller code. Prefer it. */
33108 if (hard_regno
== 0)
33113 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33115 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33116 QImode must go into class Q_REGS.
33117 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33118 movdf to do mem-to-mem moves through integer regs. */
33121 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33123 enum machine_mode mode
= GET_MODE (x
);
33125 /* We're only allowed to return a subclass of CLASS. Many of the
33126 following checks fail for NO_REGS, so eliminate that early. */
33127 if (regclass
== NO_REGS
)
33130 /* All classes can load zeros. */
33131 if (x
== CONST0_RTX (mode
))
33134 /* Force constants into memory if we are loading a (nonzero) constant into
33135 an MMX or SSE register. This is because there are no MMX/SSE instructions
33136 to load from a constant. */
33138 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33141 /* Prefer SSE regs only, if we can use them for math. */
33142 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33143 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33145 /* Floating-point constants need more complex checks. */
33146 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33148 /* General regs can load everything. */
33149 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33152 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33153 zero above. We only want to wind up preferring 80387 registers if
33154 we plan on doing computation with them. */
33156 && standard_80387_constant_p (x
) > 0)
33158 /* Limit class to non-sse. */
33159 if (regclass
== FLOAT_SSE_REGS
)
33161 if (regclass
== FP_TOP_SSE_REGS
)
33163 if (regclass
== FP_SECOND_SSE_REGS
)
33164 return FP_SECOND_REG
;
33165 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33172 /* Generally when we see PLUS here, it's the function invariant
33173 (plus soft-fp const_int). Which can only be computed into general
33175 if (GET_CODE (x
) == PLUS
)
33176 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33178 /* QImode constants are easy to load, but non-constant QImode data
33179 must go into Q_REGS. */
33180 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33182 if (reg_class_subset_p (regclass
, Q_REGS
))
33184 if (reg_class_subset_p (Q_REGS
, regclass
))
33192 /* Discourage putting floating-point values in SSE registers unless
33193 SSE math is being used, and likewise for the 387 registers. */
33195 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33197 enum machine_mode mode
= GET_MODE (x
);
33199 /* Restrict the output reload class to the register bank that we are doing
33200 math on. If we would like not to return a subset of CLASS, reject this
33201 alternative: if reload cannot do this, it will still use its choice. */
33202 mode
= GET_MODE (x
);
33203 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33204 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33206 if (X87_FLOAT_MODE_P (mode
))
33208 if (regclass
== FP_TOP_SSE_REGS
)
33210 else if (regclass
== FP_SECOND_SSE_REGS
)
33211 return FP_SECOND_REG
;
33213 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33220 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33221 enum machine_mode mode
, secondary_reload_info
*sri
)
33223 /* Double-word spills from general registers to non-offsettable memory
33224 references (zero-extended addresses) require special handling. */
33227 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33228 && rclass
== GENERAL_REGS
33229 && !offsettable_memref_p (x
))
33232 ? CODE_FOR_reload_noff_load
33233 : CODE_FOR_reload_noff_store
);
33234 /* Add the cost of moving address to a temporary. */
33235 sri
->extra_cost
= 1;
33240 /* QImode spills from non-QI registers require
33241 intermediate register on 32bit targets. */
33243 && !in_p
&& mode
== QImode
33244 && (rclass
== GENERAL_REGS
33245 || rclass
== LEGACY_REGS
33246 || rclass
== NON_Q_REGS
33249 || rclass
== INDEX_REGS
))
33258 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33259 regno
= true_regnum (x
);
33261 /* Return Q_REGS if the operand is in memory. */
33266 /* This condition handles corner case where an expression involving
33267 pointers gets vectorized. We're trying to use the address of a
33268 stack slot as a vector initializer.
33270 (set (reg:V2DI 74 [ vect_cst_.2 ])
33271 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33273 Eventually frame gets turned into sp+offset like this:
33275 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33276 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33277 (const_int 392 [0x188]))))
33279 That later gets turned into:
33281 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33282 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33283 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33285 We'll have the following reload recorded:
33287 Reload 0: reload_in (DI) =
33288 (plus:DI (reg/f:DI 7 sp)
33289 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33290 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33291 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33292 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33293 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33294 reload_reg_rtx: (reg:V2DI 22 xmm1)
33296 Which isn't going to work since SSE instructions can't handle scalar
33297 additions. Returning GENERAL_REGS forces the addition into integer
33298 register and reload can handle subsequent reloads without problems. */
33300 if (in_p
&& GET_CODE (x
) == PLUS
33301 && SSE_CLASS_P (rclass
)
33302 && SCALAR_INT_MODE_P (mode
))
33303 return GENERAL_REGS
;
33308 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33311 ix86_class_likely_spilled_p (reg_class_t rclass
)
33322 case SSE_FIRST_REG
:
33324 case FP_SECOND_REG
:
33334 /* If we are copying between general and FP registers, we need a memory
33335 location. The same is true for SSE and MMX registers.
33337 To optimize register_move_cost performance, allow inline variant.
33339 The macro can't work reliably when one of the CLASSES is class containing
33340 registers from multiple units (SSE, MMX, integer). We avoid this by never
33341 combining those units in single alternative in the machine description.
33342 Ensure that this constraint holds to avoid unexpected surprises.
33344 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33345 enforce these sanity checks. */
33348 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33349 enum machine_mode mode
, int strict
)
33351 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
33352 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
33353 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
33354 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
33355 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
33356 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
33358 gcc_assert (!strict
|| lra_in_progress
);
33362 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
33365 /* ??? This is a lie. We do have moves between mmx/general, and for
33366 mmx/sse2. But by saying we need secondary memory we discourage the
33367 register allocator from using the mmx registers unless needed. */
33368 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
33371 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33373 /* SSE1 doesn't have any direct moves from other classes. */
33377 /* If the target says that inter-unit moves are more expensive
33378 than moving through memory, then don't generate them. */
33379 if (!TARGET_INTER_UNIT_MOVES
)
33382 /* Between SSE and general, we have moves no larger than word size. */
33383 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33391 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33392 enum machine_mode mode
, int strict
)
33394 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
33397 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33399 On the 80386, this is the size of MODE in words,
33400 except in the FP regs, where a single reg is always enough. */
33402 static unsigned char
33403 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
33405 if (MAYBE_INTEGER_CLASS_P (rclass
))
33407 if (mode
== XFmode
)
33408 return (TARGET_64BIT
? 2 : 3);
33409 else if (mode
== XCmode
)
33410 return (TARGET_64BIT
? 4 : 6);
33412 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
33416 if (COMPLEX_MODE_P (mode
))
33423 /* Return true if the registers in CLASS cannot represent the change from
33424 modes FROM to TO. */
33427 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
33428 enum reg_class regclass
)
33433 /* x87 registers can't do subreg at all, as all values are reformatted
33434 to extended precision. */
33435 if (MAYBE_FLOAT_CLASS_P (regclass
))
33438 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
33440 /* Vector registers do not support QI or HImode loads. If we don't
33441 disallow a change to these modes, reload will assume it's ok to
33442 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33443 the vec_dupv4hi pattern. */
33444 if (GET_MODE_SIZE (from
) < 4)
33447 /* Vector registers do not support subreg with nonzero offsets, which
33448 are otherwise valid for integer registers. Since we can't see
33449 whether we have a nonzero offset from here, prohibit all
33450 nonparadoxical subregs changing size. */
33451 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
33458 /* Return the cost of moving data of mode M between a
33459 register and memory. A value of 2 is the default; this cost is
33460 relative to those in `REGISTER_MOVE_COST'.
33462 This function is used extensively by register_move_cost that is used to
33463 build tables at startup. Make it inline in this case.
33464 When IN is 2, return maximum of in and out move cost.
33466 If moving between registers and memory is more expensive than
33467 between two registers, you should define this macro to express the
33470 Model also increased moving costs of QImode registers in non
33474 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
33478 if (FLOAT_CLASS_P (regclass
))
33496 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
33497 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
33499 if (SSE_CLASS_P (regclass
))
33502 switch (GET_MODE_SIZE (mode
))
33517 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
33518 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
33520 if (MMX_CLASS_P (regclass
))
33523 switch (GET_MODE_SIZE (mode
))
33535 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
33536 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
33538 switch (GET_MODE_SIZE (mode
))
33541 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
33544 return ix86_cost
->int_store
[0];
33545 if (TARGET_PARTIAL_REG_DEPENDENCY
33546 && optimize_function_for_speed_p (cfun
))
33547 cost
= ix86_cost
->movzbl_load
;
33549 cost
= ix86_cost
->int_load
[0];
33551 return MAX (cost
, ix86_cost
->int_store
[0]);
33557 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
33559 return ix86_cost
->movzbl_load
;
33561 return ix86_cost
->int_store
[0] + 4;
33566 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
33567 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
33569 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
33570 if (mode
== TFmode
)
33573 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
33575 cost
= ix86_cost
->int_load
[2];
33577 cost
= ix86_cost
->int_store
[2];
33578 return (cost
* (((int) GET_MODE_SIZE (mode
)
33579 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
33584 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
33587 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
33591 /* Return the cost of moving data from a register in class CLASS1 to
33592 one in class CLASS2.
33594 It is not required that the cost always equal 2 when FROM is the same as TO;
33595 on some machines it is expensive to move between registers if they are not
33596 general registers. */
33599 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
33600 reg_class_t class2_i
)
33602 enum reg_class class1
= (enum reg_class
) class1_i
;
33603 enum reg_class class2
= (enum reg_class
) class2_i
;
33605 /* In case we require secondary memory, compute cost of the store followed
33606 by load. In order to avoid bad register allocation choices, we need
33607 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
33609 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
33613 cost
+= inline_memory_move_cost (mode
, class1
, 2);
33614 cost
+= inline_memory_move_cost (mode
, class2
, 2);
33616 /* In case of copying from general_purpose_register we may emit multiple
33617 stores followed by single load causing memory size mismatch stall.
33618 Count this as arbitrarily high cost of 20. */
33619 if (targetm
.class_max_nregs (class1
, mode
)
33620 > targetm
.class_max_nregs (class2
, mode
))
33623 /* In the case of FP/MMX moves, the registers actually overlap, and we
33624 have to switch modes in order to treat them differently. */
33625 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
33626 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
33632 /* Moves between SSE/MMX and integer unit are expensive. */
33633 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
33634 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33636 /* ??? By keeping returned value relatively high, we limit the number
33637 of moves between integer and MMX/SSE registers for all targets.
33638 Additionally, high value prevents problem with x86_modes_tieable_p(),
33639 where integer modes in MMX/SSE registers are not tieable
33640 because of missing QImode and HImode moves to, from or between
33641 MMX/SSE registers. */
33642 return MAX (8, ix86_cost
->mmxsse_to_integer
);
33644 if (MAYBE_FLOAT_CLASS_P (class1
))
33645 return ix86_cost
->fp_move
;
33646 if (MAYBE_SSE_CLASS_P (class1
))
33647 return ix86_cost
->sse_move
;
33648 if (MAYBE_MMX_CLASS_P (class1
))
33649 return ix86_cost
->mmx_move
;
33653 /* Return TRUE if hard register REGNO can hold a value of machine-mode
33657 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
33659 /* Flags and only flags can only hold CCmode values. */
33660 if (CC_REGNO_P (regno
))
33661 return GET_MODE_CLASS (mode
) == MODE_CC
;
33662 if (GET_MODE_CLASS (mode
) == MODE_CC
33663 || GET_MODE_CLASS (mode
) == MODE_RANDOM
33664 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
33666 if (STACK_REGNO_P (regno
))
33667 return VALID_FP_MODE_P (mode
);
33668 if (SSE_REGNO_P (regno
))
33670 /* We implement the move patterns for all vector modes into and
33671 out of SSE registers, even when no operation instructions
33672 are available. OImode move is available only when AVX is
33674 return ((TARGET_AVX
&& mode
== OImode
)
33675 || VALID_AVX256_REG_MODE (mode
)
33676 || VALID_SSE_REG_MODE (mode
)
33677 || VALID_SSE2_REG_MODE (mode
)
33678 || VALID_MMX_REG_MODE (mode
)
33679 || VALID_MMX_REG_MODE_3DNOW (mode
));
33681 if (MMX_REGNO_P (regno
))
33683 /* We implement the move patterns for 3DNOW modes even in MMX mode,
33684 so if the register is available at all, then we can move data of
33685 the given mode into or out of it. */
33686 return (VALID_MMX_REG_MODE (mode
)
33687 || VALID_MMX_REG_MODE_3DNOW (mode
));
33690 if (mode
== QImode
)
33692 /* Take care for QImode values - they can be in non-QI regs,
33693 but then they do cause partial register stalls. */
33694 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
33696 if (!TARGET_PARTIAL_REG_STALL
)
33698 return !can_create_pseudo_p ();
33700 /* We handle both integer and floats in the general purpose registers. */
33701 else if (VALID_INT_MODE_P (mode
))
33703 else if (VALID_FP_MODE_P (mode
))
33705 else if (VALID_DFP_MODE_P (mode
))
33707 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
33708 on to use that value in smaller contexts, this can easily force a
33709 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
33710 supporting DImode, allow it. */
33711 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
33717 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
33718 tieable integer mode. */
33721 ix86_tieable_integer_mode_p (enum machine_mode mode
)
33730 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
33733 return TARGET_64BIT
;
33740 /* Return true if MODE1 is accessible in a register that can hold MODE2
33741 without copying. That is, all register classes that can hold MODE2
33742 can also hold MODE1. */
33745 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
33747 if (mode1
== mode2
)
33750 if (ix86_tieable_integer_mode_p (mode1
)
33751 && ix86_tieable_integer_mode_p (mode2
))
33754 /* MODE2 being XFmode implies fp stack or general regs, which means we
33755 can tie any smaller floating point modes to it. Note that we do not
33756 tie this with TFmode. */
33757 if (mode2
== XFmode
)
33758 return mode1
== SFmode
|| mode1
== DFmode
;
33760 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
33761 that we can tie it with SFmode. */
33762 if (mode2
== DFmode
)
33763 return mode1
== SFmode
;
33765 /* If MODE2 is only appropriate for an SSE register, then tie with
33766 any other mode acceptable to SSE registers. */
33767 if (GET_MODE_SIZE (mode2
) == 32
33768 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33769 return (GET_MODE_SIZE (mode1
) == 32
33770 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33771 if (GET_MODE_SIZE (mode2
) == 16
33772 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33773 return (GET_MODE_SIZE (mode1
) == 16
33774 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33776 /* If MODE2 is appropriate for an MMX register, then tie
33777 with any other mode acceptable to MMX registers. */
33778 if (GET_MODE_SIZE (mode2
) == 8
33779 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
33780 return (GET_MODE_SIZE (mode1
) == 8
33781 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
33786 /* Return the cost of moving between two registers of mode MODE. */
33789 ix86_set_reg_reg_cost (enum machine_mode mode
)
33791 unsigned int units
= UNITS_PER_WORD
;
33793 switch (GET_MODE_CLASS (mode
))
33799 units
= GET_MODE_SIZE (CCmode
);
33803 if ((TARGET_SSE
&& mode
== TFmode
)
33804 || (TARGET_80387
&& mode
== XFmode
)
33805 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
33806 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
33807 units
= GET_MODE_SIZE (mode
);
33810 case MODE_COMPLEX_FLOAT
:
33811 if ((TARGET_SSE
&& mode
== TCmode
)
33812 || (TARGET_80387
&& mode
== XCmode
)
33813 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
33814 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
33815 units
= GET_MODE_SIZE (mode
);
33818 case MODE_VECTOR_INT
:
33819 case MODE_VECTOR_FLOAT
:
33820 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
33821 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
33822 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
33823 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
33824 units
= GET_MODE_SIZE (mode
);
33827 /* Return the cost of moving between two registers of mode MODE,
33828 assuming that the move will be in pieces of at most UNITS bytes. */
33829 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
33832 /* Compute a (partial) cost for rtx X. Return true if the complete
33833 cost has been computed, and false if subexpressions should be
33834 scanned. In either case, *TOTAL contains the cost result. */
33837 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
33840 enum rtx_code code
= (enum rtx_code
) code_i
;
33841 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
33842 enum machine_mode mode
= GET_MODE (x
);
33843 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
33848 if (register_operand (SET_DEST (x
), VOIDmode
)
33849 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
33851 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
33860 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
33862 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
33864 else if (flag_pic
&& SYMBOLIC_CONST (x
)
33866 || (!GET_CODE (x
) != LABEL_REF
33867 && (GET_CODE (x
) != SYMBOL_REF
33868 || !SYMBOL_REF_LOCAL_P (x
)))))
33875 if (mode
== VOIDmode
)
33880 switch (standard_80387_constant_p (x
))
33885 default: /* Other constants */
33892 if (SSE_FLOAT_MODE_P (mode
))
33895 switch (standard_sse_constant_p (x
))
33899 case 1: /* 0: xor eliminates false dependency */
33902 default: /* -1: cmp contains false dependency */
33907 /* Fall back to (MEM (SYMBOL_REF)), since that's where
33908 it'll probably end up. Add a penalty for size. */
33909 *total
= (COSTS_N_INSNS (1)
33910 + (flag_pic
!= 0 && !TARGET_64BIT
)
33911 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
33915 /* The zero extensions is often completely free on x86_64, so make
33916 it as cheap as possible. */
33917 if (TARGET_64BIT
&& mode
== DImode
33918 && GET_MODE (XEXP (x
, 0)) == SImode
)
33920 else if (TARGET_ZERO_EXTEND_WITH_AND
)
33921 *total
= cost
->add
;
33923 *total
= cost
->movzx
;
33927 *total
= cost
->movsx
;
33931 if (SCALAR_INT_MODE_P (mode
)
33932 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
33933 && CONST_INT_P (XEXP (x
, 1)))
33935 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
33938 *total
= cost
->add
;
33941 if ((value
== 2 || value
== 3)
33942 && cost
->lea
<= cost
->shift_const
)
33944 *total
= cost
->lea
;
33954 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
33956 /* ??? Should be SSE vector operation cost. */
33957 /* At least for published AMD latencies, this really is the same
33958 as the latency for a simple fpu operation like fabs. */
33959 /* V*QImode is emulated with 1-11 insns. */
33960 if (mode
== V16QImode
|| mode
== V32QImode
)
33963 if (TARGET_XOP
&& mode
== V16QImode
)
33965 /* For XOP we use vpshab, which requires a broadcast of the
33966 value to the variable shift insn. For constants this
33967 means a V16Q const in mem; even when we can perform the
33968 shift with one insn set the cost to prefer paddb. */
33969 if (CONSTANT_P (XEXP (x
, 1)))
33971 *total
= (cost
->fabs
33972 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
33973 + (speed
? 2 : COSTS_N_BYTES (16)));
33978 else if (TARGET_SSSE3
)
33980 *total
= cost
->fabs
* count
;
33983 *total
= cost
->fabs
;
33985 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33987 if (CONST_INT_P (XEXP (x
, 1)))
33989 if (INTVAL (XEXP (x
, 1)) > 32)
33990 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
33992 *total
= cost
->shift_const
* 2;
33996 if (GET_CODE (XEXP (x
, 1)) == AND
)
33997 *total
= cost
->shift_var
* 2;
33999 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34004 if (CONST_INT_P (XEXP (x
, 1)))
34005 *total
= cost
->shift_const
;
34007 *total
= cost
->shift_var
;
34015 gcc_assert (FLOAT_MODE_P (mode
));
34016 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34018 /* ??? SSE scalar/vector cost should be used here. */
34019 /* ??? Bald assumption that fma has the same cost as fmul. */
34020 *total
= cost
->fmul
;
34021 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34023 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34025 if (GET_CODE (sub
) == NEG
)
34026 sub
= XEXP (sub
, 0);
34027 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34030 if (GET_CODE (sub
) == NEG
)
34031 sub
= XEXP (sub
, 0);
34032 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34037 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34039 /* ??? SSE scalar cost should be used here. */
34040 *total
= cost
->fmul
;
34043 else if (X87_FLOAT_MODE_P (mode
))
34045 *total
= cost
->fmul
;
34048 else if (FLOAT_MODE_P (mode
))
34050 /* ??? SSE vector cost should be used here. */
34051 *total
= cost
->fmul
;
34054 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34056 /* V*QImode is emulated with 7-13 insns. */
34057 if (mode
== V16QImode
|| mode
== V32QImode
)
34060 if (TARGET_XOP
&& mode
== V16QImode
)
34062 else if (TARGET_SSSE3
)
34064 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34066 /* V*DImode is emulated with 5-8 insns. */
34067 else if (mode
== V2DImode
|| mode
== V4DImode
)
34069 if (TARGET_XOP
&& mode
== V2DImode
)
34070 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34072 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34074 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34075 insns, including two PMULUDQ. */
34076 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34077 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34079 *total
= cost
->fmul
;
34084 rtx op0
= XEXP (x
, 0);
34085 rtx op1
= XEXP (x
, 1);
34087 if (CONST_INT_P (XEXP (x
, 1)))
34089 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34090 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34094 /* This is arbitrary. */
34097 /* Compute costs correctly for widening multiplication. */
34098 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34099 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34100 == GET_MODE_SIZE (mode
))
34102 int is_mulwiden
= 0;
34103 enum machine_mode inner_mode
= GET_MODE (op0
);
34105 if (GET_CODE (op0
) == GET_CODE (op1
))
34106 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34107 else if (CONST_INT_P (op1
))
34109 if (GET_CODE (op0
) == SIGN_EXTEND
)
34110 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34113 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34117 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34120 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34121 + nbits
* cost
->mult_bit
34122 + rtx_cost (op0
, outer_code
, opno
, speed
)
34123 + rtx_cost (op1
, outer_code
, opno
, speed
));
34132 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34133 /* ??? SSE cost should be used here. */
34134 *total
= cost
->fdiv
;
34135 else if (X87_FLOAT_MODE_P (mode
))
34136 *total
= cost
->fdiv
;
34137 else if (FLOAT_MODE_P (mode
))
34138 /* ??? SSE vector cost should be used here. */
34139 *total
= cost
->fdiv
;
34141 *total
= cost
->divide
[MODE_INDEX (mode
)];
34145 if (GET_MODE_CLASS (mode
) == MODE_INT
34146 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34148 if (GET_CODE (XEXP (x
, 0)) == PLUS
34149 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34150 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34151 && CONSTANT_P (XEXP (x
, 1)))
34153 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34154 if (val
== 2 || val
== 4 || val
== 8)
34156 *total
= cost
->lea
;
34157 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34158 outer_code
, opno
, speed
);
34159 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34160 outer_code
, opno
, speed
);
34161 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34165 else if (GET_CODE (XEXP (x
, 0)) == MULT
34166 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34168 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34169 if (val
== 2 || val
== 4 || val
== 8)
34171 *total
= cost
->lea
;
34172 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34173 outer_code
, opno
, speed
);
34174 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34178 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34180 *total
= cost
->lea
;
34181 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34182 outer_code
, opno
, speed
);
34183 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34184 outer_code
, opno
, speed
);
34185 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34192 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34194 /* ??? SSE cost should be used here. */
34195 *total
= cost
->fadd
;
34198 else if (X87_FLOAT_MODE_P (mode
))
34200 *total
= cost
->fadd
;
34203 else if (FLOAT_MODE_P (mode
))
34205 /* ??? SSE vector cost should be used here. */
34206 *total
= cost
->fadd
;
34214 if (GET_MODE_CLASS (mode
) == MODE_INT
34215 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34217 *total
= (cost
->add
* 2
34218 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34219 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34220 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34221 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34227 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34229 /* ??? SSE cost should be used here. */
34230 *total
= cost
->fchs
;
34233 else if (X87_FLOAT_MODE_P (mode
))
34235 *total
= cost
->fchs
;
34238 else if (FLOAT_MODE_P (mode
))
34240 /* ??? SSE vector cost should be used here. */
34241 *total
= cost
->fchs
;
34247 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34249 /* ??? Should be SSE vector operation cost. */
34250 /* At least for published AMD latencies, this really is the same
34251 as the latency for a simple fpu operation like fabs. */
34252 *total
= cost
->fabs
;
34254 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34255 *total
= cost
->add
* 2;
34257 *total
= cost
->add
;
34261 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34262 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34263 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34264 && XEXP (x
, 1) == const0_rtx
)
34266 /* This kind of construct is implemented using test[bwl].
34267 Treat it as if we had an AND. */
34268 *total
= (cost
->add
34269 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34270 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34276 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34281 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34282 /* ??? SSE cost should be used here. */
34283 *total
= cost
->fabs
;
34284 else if (X87_FLOAT_MODE_P (mode
))
34285 *total
= cost
->fabs
;
34286 else if (FLOAT_MODE_P (mode
))
34287 /* ??? SSE vector cost should be used here. */
34288 *total
= cost
->fabs
;
34292 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34293 /* ??? SSE cost should be used here. */
34294 *total
= cost
->fsqrt
;
34295 else if (X87_FLOAT_MODE_P (mode
))
34296 *total
= cost
->fsqrt
;
34297 else if (FLOAT_MODE_P (mode
))
34298 /* ??? SSE vector cost should be used here. */
34299 *total
= cost
->fsqrt
;
34303 if (XINT (x
, 1) == UNSPEC_TP
)
34310 case VEC_DUPLICATE
:
34311 /* ??? Assume all of these vector manipulation patterns are
34312 recognizable. In which case they all pretty much have the
34314 *total
= cost
->fabs
;
34324 static int current_machopic_label_num
;
34326 /* Given a symbol name and its associated stub, write out the
34327 definition of the stub. */
34330 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
34332 unsigned int length
;
34333 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
34334 int label
= ++current_machopic_label_num
;
34336 /* For 64-bit we shouldn't get here. */
34337 gcc_assert (!TARGET_64BIT
);
34339 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34340 symb
= targetm
.strip_name_encoding (symb
);
34342 length
= strlen (stub
);
34343 binder_name
= XALLOCAVEC (char, length
+ 32);
34344 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
34346 length
= strlen (symb
);
34347 symbol_name
= XALLOCAVEC (char, length
+ 32);
34348 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
34350 sprintf (lazy_ptr_name
, "L%d$lz", label
);
34352 if (MACHOPIC_ATT_STUB
)
34353 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
34354 else if (MACHOPIC_PURE
)
34355 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
34357 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
34359 fprintf (file
, "%s:\n", stub
);
34360 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34362 if (MACHOPIC_ATT_STUB
)
34364 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34366 else if (MACHOPIC_PURE
)
34369 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34370 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
34371 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34372 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34373 label
, lazy_ptr_name
, label
);
34374 fprintf (file
, "\tjmp\t*%%ecx\n");
34377 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
34379 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34380 it needs no stub-binding-helper. */
34381 if (MACHOPIC_ATT_STUB
)
34384 fprintf (file
, "%s:\n", binder_name
);
34388 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
34389 fprintf (file
, "\tpushl\t%%ecx\n");
34392 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
34394 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
34396 /* N.B. Keep the correspondence of these
34397 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34398 old-pic/new-pic/non-pic stubs; altering this will break
34399 compatibility with existing dylibs. */
34402 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34403 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
34406 /* 16-byte -mdynamic-no-pic stub. */
34407 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
34409 fprintf (file
, "%s:\n", lazy_ptr_name
);
34410 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34411 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
34413 #endif /* TARGET_MACHO */
34415 /* Order the registers for register allocator. */
34418 x86_order_regs_for_local_alloc (void)
34423 /* First allocate the local general purpose registers. */
34424 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34425 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
34426 reg_alloc_order
[pos
++] = i
;
34428 /* Global general purpose registers. */
34429 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34430 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
34431 reg_alloc_order
[pos
++] = i
;
34433 /* x87 registers come first in case we are doing FP math
34435 if (!TARGET_SSE_MATH
)
34436 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34437 reg_alloc_order
[pos
++] = i
;
34439 /* SSE registers. */
34440 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
34441 reg_alloc_order
[pos
++] = i
;
34442 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
34443 reg_alloc_order
[pos
++] = i
;
34445 /* x87 registers. */
34446 if (TARGET_SSE_MATH
)
34447 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34448 reg_alloc_order
[pos
++] = i
;
34450 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
34451 reg_alloc_order
[pos
++] = i
;
34453 /* Initialize the rest of array as we do not allocate some registers
34455 while (pos
< FIRST_PSEUDO_REGISTER
)
34456 reg_alloc_order
[pos
++] = 0;
34459 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34460 in struct attribute_spec handler. */
34462 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
34464 int flags ATTRIBUTE_UNUSED
,
34465 bool *no_add_attrs
)
34467 if (TREE_CODE (*node
) != FUNCTION_TYPE
34468 && TREE_CODE (*node
) != METHOD_TYPE
34469 && TREE_CODE (*node
) != FIELD_DECL
34470 && TREE_CODE (*node
) != TYPE_DECL
)
34472 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34474 *no_add_attrs
= true;
34479 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
34481 *no_add_attrs
= true;
34484 if (is_attribute_p ("callee_pop_aggregate_return", name
))
34488 cst
= TREE_VALUE (args
);
34489 if (TREE_CODE (cst
) != INTEGER_CST
)
34491 warning (OPT_Wattributes
,
34492 "%qE attribute requires an integer constant argument",
34494 *no_add_attrs
= true;
34496 else if (compare_tree_int (cst
, 0) != 0
34497 && compare_tree_int (cst
, 1) != 0)
34499 warning (OPT_Wattributes
,
34500 "argument to %qE attribute is neither zero, nor one",
34502 *no_add_attrs
= true;
34511 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34512 struct attribute_spec.handler. */
34514 ix86_handle_abi_attribute (tree
*node
, tree name
,
34515 tree args ATTRIBUTE_UNUSED
,
34516 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34518 if (TREE_CODE (*node
) != FUNCTION_TYPE
34519 && TREE_CODE (*node
) != METHOD_TYPE
34520 && TREE_CODE (*node
) != FIELD_DECL
34521 && TREE_CODE (*node
) != TYPE_DECL
)
34523 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34525 *no_add_attrs
= true;
34529 /* Can combine regparm with all attributes but fastcall. */
34530 if (is_attribute_p ("ms_abi", name
))
34532 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
34534 error ("ms_abi and sysv_abi attributes are not compatible");
34539 else if (is_attribute_p ("sysv_abi", name
))
34541 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
34543 error ("ms_abi and sysv_abi attributes are not compatible");
34552 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34553 struct attribute_spec.handler. */
34555 ix86_handle_struct_attribute (tree
*node
, tree name
,
34556 tree args ATTRIBUTE_UNUSED
,
34557 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34560 if (DECL_P (*node
))
34562 if (TREE_CODE (*node
) == TYPE_DECL
)
34563 type
= &TREE_TYPE (*node
);
34568 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
34570 warning (OPT_Wattributes
, "%qE attribute ignored",
34572 *no_add_attrs
= true;
34575 else if ((is_attribute_p ("ms_struct", name
)
34576 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
34577 || ((is_attribute_p ("gcc_struct", name
)
34578 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
34580 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
34582 *no_add_attrs
= true;
34589 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
34590 tree args ATTRIBUTE_UNUSED
,
34591 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34593 if (TREE_CODE (*node
) != FUNCTION_DECL
)
34595 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34597 *no_add_attrs
= true;
34603 ix86_ms_bitfield_layout_p (const_tree record_type
)
34605 return ((TARGET_MS_BITFIELD_LAYOUT
34606 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
34607 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
34610 /* Returns an expression indicating where the this parameter is
34611 located on entry to the FUNCTION. */
34614 x86_this_parameter (tree function
)
34616 tree type
= TREE_TYPE (function
);
34617 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
34622 const int *parm_regs
;
34624 if (ix86_function_type_abi (type
) == MS_ABI
)
34625 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
34627 parm_regs
= x86_64_int_parameter_registers
;
34628 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
34631 nregs
= ix86_function_regparm (type
, function
);
34633 if (nregs
> 0 && !stdarg_p (type
))
34636 unsigned int ccvt
= ix86_get_callcvt (type
);
34638 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34639 regno
= aggr
? DX_REG
: CX_REG
;
34640 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34644 return gen_rtx_MEM (SImode
,
34645 plus_constant (Pmode
, stack_pointer_rtx
, 4));
34654 return gen_rtx_MEM (SImode
,
34655 plus_constant (Pmode
,
34656 stack_pointer_rtx
, 4));
34659 return gen_rtx_REG (SImode
, regno
);
34662 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
34666 /* Determine whether x86_output_mi_thunk can succeed. */
34669 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
34670 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
34671 HOST_WIDE_INT vcall_offset
, const_tree function
)
34673 /* 64-bit can handle anything. */
34677 /* For 32-bit, everything's fine if we have one free register. */
34678 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
34681 /* Need a free register for vcall_offset. */
34685 /* Need a free register for GOT references. */
34686 if (flag_pic
&& !targetm
.binds_local_p (function
))
34689 /* Otherwise ok. */
34693 /* Output the assembler code for a thunk function. THUNK_DECL is the
34694 declaration for the thunk function itself, FUNCTION is the decl for
34695 the target function. DELTA is an immediate constant offset to be
34696 added to THIS. If VCALL_OFFSET is nonzero, the word at
34697 *(*this + vcall_offset) should be added to THIS. */
34700 x86_output_mi_thunk (FILE *file
,
34701 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
34702 HOST_WIDE_INT vcall_offset
, tree function
)
34704 rtx this_param
= x86_this_parameter (function
);
34705 rtx this_reg
, tmp
, fnaddr
;
34706 unsigned int tmp_regno
;
34709 tmp_regno
= R10_REG
;
34712 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
34713 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
34714 tmp_regno
= AX_REG
;
34716 tmp_regno
= CX_REG
;
34719 emit_note (NOTE_INSN_PROLOGUE_END
);
34721 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
34722 pull it in now and let DELTA benefit. */
34723 if (REG_P (this_param
))
34724 this_reg
= this_param
;
34725 else if (vcall_offset
)
34727 /* Put the this parameter into %eax. */
34728 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
34729 emit_move_insn (this_reg
, this_param
);
34732 this_reg
= NULL_RTX
;
34734 /* Adjust the this parameter by a fixed constant. */
34737 rtx delta_rtx
= GEN_INT (delta
);
34738 rtx delta_dst
= this_reg
? this_reg
: this_param
;
34742 if (!x86_64_general_operand (delta_rtx
, Pmode
))
34744 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34745 emit_move_insn (tmp
, delta_rtx
);
34750 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
34753 /* Adjust the this parameter by a value stored in the vtable. */
34756 rtx vcall_addr
, vcall_mem
, this_mem
;
34758 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34760 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
34761 if (Pmode
!= ptr_mode
)
34762 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
34763 emit_move_insn (tmp
, this_mem
);
34765 /* Adjust the this parameter. */
34766 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
34768 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
34770 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
34771 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
34772 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
34775 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
34776 if (Pmode
!= ptr_mode
)
34777 emit_insn (gen_addsi_1_zext (this_reg
,
34778 gen_rtx_REG (ptr_mode
,
34782 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
34785 /* If necessary, drop THIS back to its stack slot. */
34786 if (this_reg
&& this_reg
!= this_param
)
34787 emit_move_insn (this_param
, this_reg
);
34789 fnaddr
= XEXP (DECL_RTL (function
), 0);
34792 if (!flag_pic
|| targetm
.binds_local_p (function
)
34793 || cfun
->machine
->call_abi
== MS_ABI
)
34797 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
34798 tmp
= gen_rtx_CONST (Pmode
, tmp
);
34799 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
34804 if (!flag_pic
|| targetm
.binds_local_p (function
))
34807 else if (TARGET_MACHO
)
34809 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
34810 fnaddr
= XEXP (fnaddr
, 0);
34812 #endif /* TARGET_MACHO */
34815 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
34816 output_set_got (tmp
, NULL_RTX
);
34818 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
34819 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
34820 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
34824 /* Our sibling call patterns do not allow memories, because we have no
34825 predicate that can distinguish between frame and non-frame memory.
34826 For our purposes here, we can get away with (ab)using a jump pattern,
34827 because we're going to do no optimization. */
34828 if (MEM_P (fnaddr
))
34829 emit_jump_insn (gen_indirect_jump (fnaddr
));
34832 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
34833 fnaddr
= legitimize_pic_address (fnaddr
,
34834 gen_rtx_REG (Pmode
, tmp_regno
));
34836 if (!sibcall_insn_operand (fnaddr
, word_mode
))
34838 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
34839 if (GET_MODE (fnaddr
) != word_mode
)
34840 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
34841 emit_move_insn (tmp
, fnaddr
);
34845 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
34846 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
34847 tmp
= emit_call_insn (tmp
);
34848 SIBLING_CALL_P (tmp
) = 1;
34852 /* Emit just enough of rest_of_compilation to get the insns emitted.
34853 Note that use_thunk calls assemble_start_function et al. */
34854 tmp
= get_insns ();
34855 shorten_branches (tmp
);
34856 final_start_function (tmp
, file
, 1);
34857 final (tmp
, file
, 1);
34858 final_end_function ();
34862 x86_file_start (void)
34864 default_file_start ();
34866 darwin_file_start ();
34868 if (X86_FILE_START_VERSION_DIRECTIVE
)
34869 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
34870 if (X86_FILE_START_FLTUSED
)
34871 fputs ("\t.global\t__fltused\n", asm_out_file
);
34872 if (ix86_asm_dialect
== ASM_INTEL
)
34873 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
34877 x86_field_alignment (tree field
, int computed
)
34879 enum machine_mode mode
;
34880 tree type
= TREE_TYPE (field
);
34882 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
34884 mode
= TYPE_MODE (strip_array_types (type
));
34885 if (mode
== DFmode
|| mode
== DCmode
34886 || GET_MODE_CLASS (mode
) == MODE_INT
34887 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
34888 return MIN (32, computed
);
34892 /* Output assembler code to FILE to increment profiler label # LABELNO
34893 for profiling a function entry. */
34895 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
34897 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
34902 #ifndef NO_PROFILE_COUNTERS
34903 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
34906 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
34907 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
34909 fprintf (file
, "\tcall\t%s\n", mcount_name
);
34913 #ifndef NO_PROFILE_COUNTERS
34914 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
34917 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
34921 #ifndef NO_PROFILE_COUNTERS
34922 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
34925 fprintf (file
, "\tcall\t%s\n", mcount_name
);
34929 /* We don't have exact information about the insn sizes, but we may assume
34930 quite safely that we are informed about all 1 byte insns and memory
34931 address sizes. This is enough to eliminate unnecessary padding in
34935 min_insn_size (rtx insn
)
34939 if (!INSN_P (insn
) || !active_insn_p (insn
))
34942 /* Discard alignments we've emit and jump instructions. */
34943 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
34944 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
34946 if (JUMP_TABLE_DATA_P (insn
))
34949 /* Important case - calls are always 5 bytes.
34950 It is common to have many calls in the row. */
34952 && symbolic_reference_mentioned_p (PATTERN (insn
))
34953 && !SIBLING_CALL_P (insn
))
34955 len
= get_attr_length (insn
);
34959 /* For normal instructions we rely on get_attr_length being exact,
34960 with a few exceptions. */
34961 if (!JUMP_P (insn
))
34963 enum attr_type type
= get_attr_type (insn
);
34968 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
34969 || asm_noperands (PATTERN (insn
)) >= 0)
34976 /* Otherwise trust get_attr_length. */
34980 l
= get_attr_length_address (insn
);
34981 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
34990 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
34992 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
34996 ix86_avoid_jump_mispredicts (void)
34998 rtx insn
, start
= get_insns ();
34999 int nbytes
= 0, njumps
= 0;
35002 /* Look for all minimal intervals of instructions containing 4 jumps.
35003 The intervals are bounded by START and INSN. NBYTES is the total
35004 size of instructions in the interval including INSN and not including
35005 START. When the NBYTES is smaller than 16 bytes, it is possible
35006 that the end of START and INSN ends up in the same 16byte page.
35008 The smallest offset in the page INSN can start is the case where START
35009 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35010 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35012 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35016 if (LABEL_P (insn
))
35018 int align
= label_to_alignment (insn
);
35019 int max_skip
= label_to_max_skip (insn
);
35023 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35024 already in the current 16 byte page, because otherwise
35025 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35026 bytes to reach 16 byte boundary. */
35028 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35031 fprintf (dump_file
, "Label %i with max_skip %i\n",
35032 INSN_UID (insn
), max_skip
);
35035 while (nbytes
+ max_skip
>= 16)
35037 start
= NEXT_INSN (start
);
35038 if ((JUMP_P (start
)
35039 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35040 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35042 njumps
--, isjump
= 1;
35045 nbytes
-= min_insn_size (start
);
35051 min_size
= min_insn_size (insn
);
35052 nbytes
+= min_size
;
35054 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35055 INSN_UID (insn
), min_size
);
35057 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
35058 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
35066 start
= NEXT_INSN (start
);
35067 if ((JUMP_P (start
)
35068 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35069 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35071 njumps
--, isjump
= 1;
35074 nbytes
-= min_insn_size (start
);
35076 gcc_assert (njumps
>= 0);
35078 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35079 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35081 if (njumps
== 3 && isjump
&& nbytes
< 16)
35083 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35086 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35087 INSN_UID (insn
), padsize
);
35088 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35094 /* AMD Athlon works faster
35095 when RET is not destination of conditional jump or directly preceded
35096 by other jump instruction. We avoid the penalty by inserting NOP just
35097 before the RET instructions in such cases. */
35099 ix86_pad_returns (void)
35104 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35106 basic_block bb
= e
->src
;
35107 rtx ret
= BB_END (bb
);
35109 bool replace
= false;
35111 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35112 || optimize_bb_for_size_p (bb
))
35114 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35115 if (active_insn_p (prev
) || LABEL_P (prev
))
35117 if (prev
&& LABEL_P (prev
))
35122 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35123 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35124 && !(e
->flags
& EDGE_FALLTHRU
))
35129 prev
= prev_active_insn (ret
);
35131 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35134 /* Empty functions get branch mispredict even when
35135 the jump destination is not visible to us. */
35136 if (!prev
&& !optimize_function_for_size_p (cfun
))
35141 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35147 /* Count the minimum number of instructions in BB. Return 4 if the
35148 number of instructions >= 4. */
35151 ix86_count_insn_bb (basic_block bb
)
35154 int insn_count
= 0;
35156 /* Count number of instructions in this block. Return 4 if the number
35157 of instructions >= 4. */
35158 FOR_BB_INSNS (bb
, insn
)
35160 /* Only happen in exit blocks. */
35162 && ANY_RETURN_P (PATTERN (insn
)))
35165 if (NONDEBUG_INSN_P (insn
)
35166 && GET_CODE (PATTERN (insn
)) != USE
35167 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35170 if (insn_count
>= 4)
35179 /* Count the minimum number of instructions in code path in BB.
35180 Return 4 if the number of instructions >= 4. */
35183 ix86_count_insn (basic_block bb
)
35187 int min_prev_count
;
35189 /* Only bother counting instructions along paths with no
35190 more than 2 basic blocks between entry and exit. Given
35191 that BB has an edge to exit, determine if a predecessor
35192 of BB has an edge from entry. If so, compute the number
35193 of instructions in the predecessor block. If there
35194 happen to be multiple such blocks, compute the minimum. */
35195 min_prev_count
= 4;
35196 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35199 edge_iterator prev_ei
;
35201 if (e
->src
== ENTRY_BLOCK_PTR
)
35203 min_prev_count
= 0;
35206 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35208 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35210 int count
= ix86_count_insn_bb (e
->src
);
35211 if (count
< min_prev_count
)
35212 min_prev_count
= count
;
35218 if (min_prev_count
< 4)
35219 min_prev_count
+= ix86_count_insn_bb (bb
);
35221 return min_prev_count
;
35224 /* Pad short function to 4 instructions. */
35227 ix86_pad_short_function (void)
35232 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35234 rtx ret
= BB_END (e
->src
);
35235 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35237 int insn_count
= ix86_count_insn (e
->src
);
35239 /* Pad short function. */
35240 if (insn_count
< 4)
35244 /* Find epilogue. */
35247 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35248 insn
= PREV_INSN (insn
);
35253 /* Two NOPs count as one instruction. */
35254 insn_count
= 2 * (4 - insn_count
);
35255 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35261 /* Implement machine specific optimizations. We implement padding of returns
35262 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35266 /* We are freeing block_for_insn in the toplev to keep compatibility
35267 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35268 compute_bb_for_insn ();
35270 if (optimize
&& optimize_function_for_speed_p (cfun
))
35272 if (TARGET_PAD_SHORT_FUNCTION
)
35273 ix86_pad_short_function ();
35274 else if (TARGET_PAD_RETURNS
)
35275 ix86_pad_returns ();
35276 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35277 if (TARGET_FOUR_JUMP_LIMIT
)
35278 ix86_avoid_jump_mispredicts ();
35283 /* Return nonzero when QImode register that must be represented via REX prefix
35286 x86_extended_QIreg_mentioned_p (rtx insn
)
35289 extract_insn_cached (insn
);
35290 for (i
= 0; i
< recog_data
.n_operands
; i
++)
35291 if (GENERAL_REG_P (recog_data
.operand
[i
])
35292 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
35297 /* Return nonzero when P points to register encoded via REX prefix.
35298 Called via for_each_rtx. */
35300 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
35302 unsigned int regno
;
35305 regno
= REGNO (*p
);
35306 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
35309 /* Return true when INSN mentions register that must be encoded using REX
35312 x86_extended_reg_mentioned_p (rtx insn
)
35314 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
35315 extended_reg_mentioned_1
, NULL
);
35318 /* If profitable, negate (without causing overflow) integer constant
35319 of mode MODE at location LOC. Return true in this case. */
35321 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
35325 if (!CONST_INT_P (*loc
))
35331 /* DImode x86_64 constants must fit in 32 bits. */
35332 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
35343 gcc_unreachable ();
35346 /* Avoid overflows. */
35347 if (mode_signbit_p (mode
, *loc
))
35350 val
= INTVAL (*loc
);
35352 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35353 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35354 if ((val
< 0 && val
!= -128)
35357 *loc
= GEN_INT (-val
);
35364 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35365 optabs would emit if we didn't have TFmode patterns. */
35368 x86_emit_floatuns (rtx operands
[2])
35370 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
35371 enum machine_mode mode
, inmode
;
35373 inmode
= GET_MODE (operands
[1]);
35374 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
35377 in
= force_reg (inmode
, operands
[1]);
35378 mode
= GET_MODE (out
);
35379 neglab
= gen_label_rtx ();
35380 donelab
= gen_label_rtx ();
35381 f0
= gen_reg_rtx (mode
);
35383 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
35385 expand_float (out
, in
, 0);
35387 emit_jump_insn (gen_jump (donelab
));
35390 emit_label (neglab
);
35392 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
35394 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
35396 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
35398 expand_float (f0
, i0
, 0);
35400 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
35402 emit_label (donelab
);
35405 /* AVX2 does support 32-byte integer vector operations,
35406 thus the longest vector we are faced with is V32QImode. */
35407 #define MAX_VECT_LEN 32
35409 struct expand_vec_perm_d
35411 rtx target
, op0
, op1
;
35412 unsigned char perm
[MAX_VECT_LEN
];
35413 enum machine_mode vmode
;
35414 unsigned char nelt
;
35415 bool one_operand_p
;
35419 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
35420 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
35421 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
35423 /* Get a vector mode of the same size as the original but with elements
35424 twice as wide. This is only guaranteed to apply to integral vectors. */
35426 static inline enum machine_mode
35427 get_mode_wider_vector (enum machine_mode o
)
35429 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35430 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
35431 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
35432 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
35436 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35437 with all elements equal to VAR. Return true if successful. */
35440 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
35441 rtx target
, rtx val
)
35464 /* First attempt to recognize VAL as-is. */
35465 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35466 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
35467 if (recog_memoized (insn
) < 0)
35470 /* If that fails, force VAL into a register. */
35473 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
35474 seq
= get_insns ();
35477 emit_insn_before (seq
, insn
);
35479 ok
= recog_memoized (insn
) >= 0;
35488 if (TARGET_SSE
|| TARGET_3DNOW_A
)
35492 val
= gen_lowpart (SImode
, val
);
35493 x
= gen_rtx_TRUNCATE (HImode
, val
);
35494 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
35495 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35508 struct expand_vec_perm_d dperm
;
35512 memset (&dperm
, 0, sizeof (dperm
));
35513 dperm
.target
= target
;
35514 dperm
.vmode
= mode
;
35515 dperm
.nelt
= GET_MODE_NUNITS (mode
);
35516 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
35517 dperm
.one_operand_p
= true;
35519 /* Extend to SImode using a paradoxical SUBREG. */
35520 tmp1
= gen_reg_rtx (SImode
);
35521 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
35523 /* Insert the SImode value as low element of a V4SImode vector. */
35524 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
35525 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
35527 ok
= (expand_vec_perm_1 (&dperm
)
35528 || expand_vec_perm_broadcast_1 (&dperm
));
35540 /* Replicate the value once into the next wider mode and recurse. */
35542 enum machine_mode smode
, wsmode
, wvmode
;
35545 smode
= GET_MODE_INNER (mode
);
35546 wvmode
= get_mode_wider_vector (mode
);
35547 wsmode
= GET_MODE_INNER (wvmode
);
35549 val
= convert_modes (wsmode
, smode
, val
, true);
35550 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
35551 GEN_INT (GET_MODE_BITSIZE (smode
)),
35552 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35553 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
35555 x
= gen_lowpart (wvmode
, target
);
35556 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
35564 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
35565 rtx x
= gen_reg_rtx (hvmode
);
35567 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
35570 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
35571 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35580 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35581 whose ONE_VAR element is VAR, and other elements are zero. Return true
35585 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
35586 rtx target
, rtx var
, int one_var
)
35588 enum machine_mode vsimode
;
35591 bool use_vector_set
= false;
35596 /* For SSE4.1, we normally use vector set. But if the second
35597 element is zero and inter-unit moves are OK, we use movq
35599 use_vector_set
= (TARGET_64BIT
35601 && !(TARGET_INTER_UNIT_MOVES
35607 use_vector_set
= TARGET_SSE4_1
;
35610 use_vector_set
= TARGET_SSE2
;
35613 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
35620 use_vector_set
= TARGET_AVX
;
35623 /* Use ix86_expand_vector_set in 64bit mode only. */
35624 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
35630 if (use_vector_set
)
35632 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
35633 var
= force_reg (GET_MODE_INNER (mode
), var
);
35634 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35650 var
= force_reg (GET_MODE_INNER (mode
), var
);
35651 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
35652 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35657 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
35658 new_target
= gen_reg_rtx (mode
);
35660 new_target
= target
;
35661 var
= force_reg (GET_MODE_INNER (mode
), var
);
35662 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
35663 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
35664 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
35667 /* We need to shuffle the value to the correct position, so
35668 create a new pseudo to store the intermediate result. */
35670 /* With SSE2, we can use the integer shuffle insns. */
35671 if (mode
!= V4SFmode
&& TARGET_SSE2
)
35673 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
35675 GEN_INT (one_var
== 1 ? 0 : 1),
35676 GEN_INT (one_var
== 2 ? 0 : 1),
35677 GEN_INT (one_var
== 3 ? 0 : 1)));
35678 if (target
!= new_target
)
35679 emit_move_insn (target
, new_target
);
35683 /* Otherwise convert the intermediate result to V4SFmode and
35684 use the SSE1 shuffle instructions. */
35685 if (mode
!= V4SFmode
)
35687 tmp
= gen_reg_rtx (V4SFmode
);
35688 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
35693 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
35695 GEN_INT (one_var
== 1 ? 0 : 1),
35696 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
35697 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
35699 if (mode
!= V4SFmode
)
35700 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
35701 else if (tmp
!= target
)
35702 emit_move_insn (target
, tmp
);
35704 else if (target
!= new_target
)
35705 emit_move_insn (target
, new_target
);
35710 vsimode
= V4SImode
;
35716 vsimode
= V2SImode
;
35722 /* Zero extend the variable element to SImode and recurse. */
35723 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
35725 x
= gen_reg_rtx (vsimode
);
35726 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
35728 gcc_unreachable ();
35730 emit_move_insn (target
, gen_lowpart (mode
, x
));
35738 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35739 consisting of the values in VALS. It is known that all elements
35740 except ONE_VAR are constants. Return true if successful. */
35743 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
35744 rtx target
, rtx vals
, int one_var
)
35746 rtx var
= XVECEXP (vals
, 0, one_var
);
35747 enum machine_mode wmode
;
35750 const_vec
= copy_rtx (vals
);
35751 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
35752 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
35760 /* For the two element vectors, it's just as easy to use
35761 the general case. */
35765 /* Use ix86_expand_vector_set in 64bit mode only. */
35788 /* There's no way to set one QImode entry easily. Combine
35789 the variable value with its adjacent constant value, and
35790 promote to an HImode set. */
35791 x
= XVECEXP (vals
, 0, one_var
^ 1);
35794 var
= convert_modes (HImode
, QImode
, var
, true);
35795 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
35796 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35797 x
= GEN_INT (INTVAL (x
) & 0xff);
35801 var
= convert_modes (HImode
, QImode
, var
, true);
35802 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
35804 if (x
!= const0_rtx
)
35805 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
35806 1, OPTAB_LIB_WIDEN
);
35808 x
= gen_reg_rtx (wmode
);
35809 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
35810 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
35812 emit_move_insn (target
, gen_lowpart (mode
, x
));
35819 emit_move_insn (target
, const_vec
);
35820 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35824 /* A subroutine of ix86_expand_vector_init_general. Use vector
35825 concatenate to handle the most general case: all values variable,
35826 and none identical. */
35829 ix86_expand_vector_init_concat (enum machine_mode mode
,
35830 rtx target
, rtx
*ops
, int n
)
35832 enum machine_mode cmode
, hmode
= VOIDmode
;
35833 rtx first
[8], second
[4];
35873 gcc_unreachable ();
35876 if (!register_operand (ops
[1], cmode
))
35877 ops
[1] = force_reg (cmode
, ops
[1]);
35878 if (!register_operand (ops
[0], cmode
))
35879 ops
[0] = force_reg (cmode
, ops
[0]);
35880 emit_insn (gen_rtx_SET (VOIDmode
, target
,
35881 gen_rtx_VEC_CONCAT (mode
, ops
[0],
35901 gcc_unreachable ();
35917 gcc_unreachable ();
35922 /* FIXME: We process inputs backward to help RA. PR 36222. */
35925 for (; i
> 0; i
-= 2, j
--)
35927 first
[j
] = gen_reg_rtx (cmode
);
35928 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
35929 ix86_expand_vector_init (false, first
[j
],
35930 gen_rtx_PARALLEL (cmode
, v
));
35936 gcc_assert (hmode
!= VOIDmode
);
35937 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
35939 second
[j
] = gen_reg_rtx (hmode
);
35940 ix86_expand_vector_init_concat (hmode
, second
[j
],
35944 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
35947 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
35951 gcc_unreachable ();
35955 /* A subroutine of ix86_expand_vector_init_general. Use vector
35956 interleave to handle the most general case: all values variable,
35957 and none identical. */
35960 ix86_expand_vector_init_interleave (enum machine_mode mode
,
35961 rtx target
, rtx
*ops
, int n
)
35963 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
35966 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
35967 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
35968 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
35973 gen_load_even
= gen_vec_setv8hi
;
35974 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
35975 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
35976 inner_mode
= HImode
;
35977 first_imode
= V4SImode
;
35978 second_imode
= V2DImode
;
35979 third_imode
= VOIDmode
;
35982 gen_load_even
= gen_vec_setv16qi
;
35983 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
35984 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
35985 inner_mode
= QImode
;
35986 first_imode
= V8HImode
;
35987 second_imode
= V4SImode
;
35988 third_imode
= V2DImode
;
35991 gcc_unreachable ();
35994 for (i
= 0; i
< n
; i
++)
35996 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
35997 op0
= gen_reg_rtx (SImode
);
35998 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36000 /* Insert the SImode value as low element of V4SImode vector. */
36001 op1
= gen_reg_rtx (V4SImode
);
36002 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36003 gen_rtx_VEC_DUPLICATE (V4SImode
,
36005 CONST0_RTX (V4SImode
),
36007 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36009 /* Cast the V4SImode vector back to a vector in orignal mode. */
36010 op0
= gen_reg_rtx (mode
);
36011 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36013 /* Load even elements into the second positon. */
36014 emit_insn (gen_load_even (op0
,
36015 force_reg (inner_mode
,
36019 /* Cast vector to FIRST_IMODE vector. */
36020 ops
[i
] = gen_reg_rtx (first_imode
);
36021 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36024 /* Interleave low FIRST_IMODE vectors. */
36025 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36027 op0
= gen_reg_rtx (first_imode
);
36028 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36030 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36031 ops
[j
] = gen_reg_rtx (second_imode
);
36032 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36035 /* Interleave low SECOND_IMODE vectors. */
36036 switch (second_imode
)
36039 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36041 op0
= gen_reg_rtx (second_imode
);
36042 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36045 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36047 ops
[j
] = gen_reg_rtx (third_imode
);
36048 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36050 second_imode
= V2DImode
;
36051 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36055 op0
= gen_reg_rtx (second_imode
);
36056 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36059 /* Cast the SECOND_IMODE vector back to a vector on original
36061 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36062 gen_lowpart (mode
, op0
)));
36066 gcc_unreachable ();
36070 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36071 all values variable, and none identical. */
36074 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36075 rtx target
, rtx vals
)
36077 rtx ops
[32], op0
, op1
;
36078 enum machine_mode half_mode
= VOIDmode
;
36085 if (!mmx_ok
&& !TARGET_SSE
)
36097 n
= GET_MODE_NUNITS (mode
);
36098 for (i
= 0; i
< n
; i
++)
36099 ops
[i
] = XVECEXP (vals
, 0, i
);
36100 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36104 half_mode
= V16QImode
;
36108 half_mode
= V8HImode
;
36112 n
= GET_MODE_NUNITS (mode
);
36113 for (i
= 0; i
< n
; i
++)
36114 ops
[i
] = XVECEXP (vals
, 0, i
);
36115 op0
= gen_reg_rtx (half_mode
);
36116 op1
= gen_reg_rtx (half_mode
);
36117 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36119 ix86_expand_vector_init_interleave (half_mode
, op1
,
36120 &ops
[n
>> 1], n
>> 2);
36121 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36122 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36126 if (!TARGET_SSE4_1
)
36134 /* Don't use ix86_expand_vector_init_interleave if we can't
36135 move from GPR to SSE register directly. */
36136 if (!TARGET_INTER_UNIT_MOVES
)
36139 n
= GET_MODE_NUNITS (mode
);
36140 for (i
= 0; i
< n
; i
++)
36141 ops
[i
] = XVECEXP (vals
, 0, i
);
36142 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36150 gcc_unreachable ();
36154 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36155 enum machine_mode inner_mode
;
36156 rtx words
[4], shift
;
36158 inner_mode
= GET_MODE_INNER (mode
);
36159 n_elts
= GET_MODE_NUNITS (mode
);
36160 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36161 n_elt_per_word
= n_elts
/ n_words
;
36162 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36164 for (i
= 0; i
< n_words
; ++i
)
36166 rtx word
= NULL_RTX
;
36168 for (j
= 0; j
< n_elt_per_word
; ++j
)
36170 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36171 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36177 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36178 word
, 1, OPTAB_LIB_WIDEN
);
36179 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36180 word
, 1, OPTAB_LIB_WIDEN
);
36188 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36189 else if (n_words
== 2)
36191 rtx tmp
= gen_reg_rtx (mode
);
36192 emit_clobber (tmp
);
36193 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36194 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36195 emit_move_insn (target
, tmp
);
36197 else if (n_words
== 4)
36199 rtx tmp
= gen_reg_rtx (V4SImode
);
36200 gcc_assert (word_mode
== SImode
);
36201 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36202 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36203 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36206 gcc_unreachable ();
36210 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36211 instructions unless MMX_OK is true. */
36214 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36216 enum machine_mode mode
= GET_MODE (target
);
36217 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36218 int n_elts
= GET_MODE_NUNITS (mode
);
36219 int n_var
= 0, one_var
= -1;
36220 bool all_same
= true, all_const_zero
= true;
36224 for (i
= 0; i
< n_elts
; ++i
)
36226 x
= XVECEXP (vals
, 0, i
);
36227 if (!(CONST_INT_P (x
)
36228 || GET_CODE (x
) == CONST_DOUBLE
36229 || GET_CODE (x
) == CONST_FIXED
))
36230 n_var
++, one_var
= i
;
36231 else if (x
!= CONST0_RTX (inner_mode
))
36232 all_const_zero
= false;
36233 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36237 /* Constants are best loaded from the constant pool. */
36240 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36244 /* If all values are identical, broadcast the value. */
36246 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36247 XVECEXP (vals
, 0, 0)))
36250 /* Values where only one field is non-constant are best loaded from
36251 the pool and overwritten via move later. */
36255 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36256 XVECEXP (vals
, 0, one_var
),
36260 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
36264 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
36268 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
36270 enum machine_mode mode
= GET_MODE (target
);
36271 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36272 enum machine_mode half_mode
;
36273 bool use_vec_merge
= false;
36275 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
36277 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
36278 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
36279 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
36280 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
36281 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
36282 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
36284 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
36286 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
36287 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
36288 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
36289 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
36290 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
36291 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
36301 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36302 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
36304 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36306 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36307 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36313 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
36317 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36318 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
36320 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36322 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36323 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36330 /* For the two element vectors, we implement a VEC_CONCAT with
36331 the extraction of the other element. */
36333 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
36334 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
36337 op0
= val
, op1
= tmp
;
36339 op0
= tmp
, op1
= val
;
36341 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
36342 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36347 use_vec_merge
= TARGET_SSE4_1
;
36354 use_vec_merge
= true;
36358 /* tmp = target = A B C D */
36359 tmp
= copy_to_reg (target
);
36360 /* target = A A B B */
36361 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
36362 /* target = X A B B */
36363 ix86_expand_vector_set (false, target
, val
, 0);
36364 /* target = A X C D */
36365 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36366 const1_rtx
, const0_rtx
,
36367 GEN_INT (2+4), GEN_INT (3+4)));
36371 /* tmp = target = A B C D */
36372 tmp
= copy_to_reg (target
);
36373 /* tmp = X B C D */
36374 ix86_expand_vector_set (false, tmp
, val
, 0);
36375 /* target = A B X D */
36376 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36377 const0_rtx
, const1_rtx
,
36378 GEN_INT (0+4), GEN_INT (3+4)));
36382 /* tmp = target = A B C D */
36383 tmp
= copy_to_reg (target
);
36384 /* tmp = X B C D */
36385 ix86_expand_vector_set (false, tmp
, val
, 0);
36386 /* target = A B X D */
36387 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36388 const0_rtx
, const1_rtx
,
36389 GEN_INT (2+4), GEN_INT (0+4)));
36393 gcc_unreachable ();
36398 use_vec_merge
= TARGET_SSE4_1
;
36402 /* Element 0 handled by vec_merge below. */
36405 use_vec_merge
= true;
36411 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36412 store into element 0, then shuffle them back. */
36416 order
[0] = GEN_INT (elt
);
36417 order
[1] = const1_rtx
;
36418 order
[2] = const2_rtx
;
36419 order
[3] = GEN_INT (3);
36420 order
[elt
] = const0_rtx
;
36422 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36423 order
[1], order
[2], order
[3]));
36425 ix86_expand_vector_set (false, target
, val
, 0);
36427 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36428 order
[1], order
[2], order
[3]));
36432 /* For SSE1, we have to reuse the V4SF code. */
36433 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
36434 gen_lowpart (SFmode
, val
), elt
);
36439 use_vec_merge
= TARGET_SSE2
;
36442 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36446 use_vec_merge
= TARGET_SSE4_1
;
36453 half_mode
= V16QImode
;
36459 half_mode
= V8HImode
;
36465 half_mode
= V4SImode
;
36471 half_mode
= V2DImode
;
36477 half_mode
= V4SFmode
;
36483 half_mode
= V2DFmode
;
36489 /* Compute offset. */
36493 gcc_assert (i
<= 1);
36495 /* Extract the half. */
36496 tmp
= gen_reg_rtx (half_mode
);
36497 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
36499 /* Put val in tmp at elt. */
36500 ix86_expand_vector_set (false, tmp
, val
, elt
);
36503 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
36512 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36513 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
36514 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36518 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36520 emit_move_insn (mem
, target
);
36522 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36523 emit_move_insn (tmp
, val
);
36525 emit_move_insn (target
, mem
);
36530 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
36532 enum machine_mode mode
= GET_MODE (vec
);
36533 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36534 bool use_vec_extr
= false;
36547 use_vec_extr
= true;
36551 use_vec_extr
= TARGET_SSE4_1
;
36563 tmp
= gen_reg_rtx (mode
);
36564 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
36565 GEN_INT (elt
), GEN_INT (elt
),
36566 GEN_INT (elt
+4), GEN_INT (elt
+4)));
36570 tmp
= gen_reg_rtx (mode
);
36571 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
36575 gcc_unreachable ();
36578 use_vec_extr
= true;
36583 use_vec_extr
= TARGET_SSE4_1
;
36597 tmp
= gen_reg_rtx (mode
);
36598 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
36599 GEN_INT (elt
), GEN_INT (elt
),
36600 GEN_INT (elt
), GEN_INT (elt
)));
36604 tmp
= gen_reg_rtx (mode
);
36605 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
36609 gcc_unreachable ();
36612 use_vec_extr
= true;
36617 /* For SSE1, we have to reuse the V4SF code. */
36618 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
36619 gen_lowpart (V4SFmode
, vec
), elt
);
36625 use_vec_extr
= TARGET_SSE2
;
36628 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36632 use_vec_extr
= TARGET_SSE4_1
;
36638 tmp
= gen_reg_rtx (V4SFmode
);
36640 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
36642 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
36643 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36651 tmp
= gen_reg_rtx (V2DFmode
);
36653 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
36655 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
36656 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36664 tmp
= gen_reg_rtx (V16QImode
);
36666 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
36668 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
36669 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
36677 tmp
= gen_reg_rtx (V8HImode
);
36679 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
36681 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
36682 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
36690 tmp
= gen_reg_rtx (V4SImode
);
36692 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
36694 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
36695 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36703 tmp
= gen_reg_rtx (V2DImode
);
36705 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
36707 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
36708 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36714 /* ??? Could extract the appropriate HImode element and shift. */
36721 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
36722 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
36724 /* Let the rtl optimizers know about the zero extension performed. */
36725 if (inner_mode
== QImode
|| inner_mode
== HImode
)
36727 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
36728 target
= gen_lowpart (SImode
, target
);
36731 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36735 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36737 emit_move_insn (mem
, vec
);
36739 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36740 emit_move_insn (target
, tmp
);
36744 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
36745 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
36746 The upper bits of DEST are undefined, though they shouldn't cause
36747 exceptions (some bits from src or all zeros are ok). */
36750 emit_reduc_half (rtx dest
, rtx src
, int i
)
36753 switch (GET_MODE (src
))
36757 tem
= gen_sse_movhlps (dest
, src
, src
);
36759 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
36760 GEN_INT (1 + 4), GEN_INT (1 + 4));
36763 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
36769 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
36770 gen_lowpart (V1TImode
, src
),
36775 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
36777 tem
= gen_avx_shufps256 (dest
, src
, src
,
36778 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
36782 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
36784 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
36791 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
36792 gen_lowpart (V4DImode
, src
),
36793 gen_lowpart (V4DImode
, src
),
36796 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
36797 gen_lowpart (V2TImode
, src
),
36801 gcc_unreachable ();
36806 /* Expand a vector reduction. FN is the binary pattern to reduce;
36807 DEST is the destination; IN is the input vector. */
36810 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
36812 rtx half
, dst
, vec
= in
;
36813 enum machine_mode mode
= GET_MODE (in
);
36816 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
36818 && mode
== V8HImode
36819 && fn
== gen_uminv8hi3
)
36821 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
36825 for (i
= GET_MODE_BITSIZE (mode
);
36826 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
36829 half
= gen_reg_rtx (mode
);
36830 emit_reduc_half (half
, vec
, i
);
36831 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
36834 dst
= gen_reg_rtx (mode
);
36835 emit_insn (fn (dst
, half
, vec
));
36840 /* Target hook for scalar_mode_supported_p. */
36842 ix86_scalar_mode_supported_p (enum machine_mode mode
)
36844 if (DECIMAL_FLOAT_MODE_P (mode
))
36845 return default_decimal_float_supported_p ();
36846 else if (mode
== TFmode
)
36849 return default_scalar_mode_supported_p (mode
);
36852 /* Implements target hook vector_mode_supported_p. */
36854 ix86_vector_mode_supported_p (enum machine_mode mode
)
36856 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
36858 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
36860 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
36862 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
36864 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
36869 /* Target hook for c_mode_for_suffix. */
36870 static enum machine_mode
36871 ix86_c_mode_for_suffix (char suffix
)
36881 /* Worker function for TARGET_MD_ASM_CLOBBERS.
36883 We do this in the new i386 backend to maintain source compatibility
36884 with the old cc0-based compiler. */
36887 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
36888 tree inputs ATTRIBUTE_UNUSED
,
36891 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
36893 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
36898 /* Implements target vector targetm.asm.encode_section_info. */
36900 static void ATTRIBUTE_UNUSED
36901 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
36903 default_encode_section_info (decl
, rtl
, first
);
36905 if (TREE_CODE (decl
) == VAR_DECL
36906 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
36907 && ix86_in_large_data_p (decl
))
36908 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
36911 /* Worker function for REVERSE_CONDITION. */
36914 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
36916 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
36917 ? reverse_condition (code
)
36918 : reverse_condition_maybe_unordered (code
));
36921 /* Output code to perform an x87 FP register move, from OPERANDS[1]
36925 output_387_reg_move (rtx insn
, rtx
*operands
)
36927 if (REG_P (operands
[0]))
36929 if (REG_P (operands
[1])
36930 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
36932 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
36933 return output_387_ffreep (operands
, 0);
36934 return "fstp\t%y0";
36936 if (STACK_TOP_P (operands
[0]))
36937 return "fld%Z1\t%y1";
36940 else if (MEM_P (operands
[0]))
36942 gcc_assert (REG_P (operands
[1]));
36943 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
36944 return "fstp%Z0\t%y0";
36947 /* There is no non-popping store to memory for XFmode.
36948 So if we need one, follow the store with a load. */
36949 if (GET_MODE (operands
[0]) == XFmode
)
36950 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
36952 return "fst%Z0\t%y0";
36959 /* Output code to perform a conditional jump to LABEL, if C2 flag in
36960 FP status register is set. */
36963 ix86_emit_fp_unordered_jump (rtx label
)
36965 rtx reg
= gen_reg_rtx (HImode
);
36968 emit_insn (gen_x86_fnstsw_1 (reg
));
36970 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
36972 emit_insn (gen_x86_sahf_1 (reg
));
36974 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
36975 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
36979 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
36981 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
36982 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
36985 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
36986 gen_rtx_LABEL_REF (VOIDmode
, label
),
36988 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
36990 emit_jump_insn (temp
);
36991 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
36994 /* Output code to perform a log1p XFmode calculation. */
36996 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
36998 rtx label1
= gen_label_rtx ();
36999 rtx label2
= gen_label_rtx ();
37001 rtx tmp
= gen_reg_rtx (XFmode
);
37002 rtx tmp2
= gen_reg_rtx (XFmode
);
37005 emit_insn (gen_absxf2 (tmp
, op1
));
37006 test
= gen_rtx_GE (VOIDmode
, tmp
,
37007 CONST_DOUBLE_FROM_REAL_VALUE (
37008 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37010 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37012 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37013 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37014 emit_jump (label2
);
37016 emit_label (label1
);
37017 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37018 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37019 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37020 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37022 emit_label (label2
);
37025 /* Emit code for round calculation. */
37026 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37028 enum machine_mode inmode
= GET_MODE (op1
);
37029 enum machine_mode outmode
= GET_MODE (op0
);
37030 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37031 rtx scratch
= gen_reg_rtx (HImode
);
37032 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37033 rtx jump_label
= gen_label_rtx ();
37035 rtx (*gen_abs
) (rtx
, rtx
);
37036 rtx (*gen_neg
) (rtx
, rtx
);
37041 gen_abs
= gen_abssf2
;
37044 gen_abs
= gen_absdf2
;
37047 gen_abs
= gen_absxf2
;
37050 gcc_unreachable ();
37056 gen_neg
= gen_negsf2
;
37059 gen_neg
= gen_negdf2
;
37062 gen_neg
= gen_negxf2
;
37065 gen_neg
= gen_neghi2
;
37068 gen_neg
= gen_negsi2
;
37071 gen_neg
= gen_negdi2
;
37074 gcc_unreachable ();
37077 e1
= gen_reg_rtx (inmode
);
37078 e2
= gen_reg_rtx (inmode
);
37079 res
= gen_reg_rtx (outmode
);
37081 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37083 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37085 /* scratch = fxam(op1) */
37086 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37087 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37089 /* e1 = fabs(op1) */
37090 emit_insn (gen_abs (e1
, op1
));
37092 /* e2 = e1 + 0.5 */
37093 half
= force_reg (inmode
, half
);
37094 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37095 gen_rtx_PLUS (inmode
, e1
, half
)));
37097 /* res = floor(e2) */
37098 if (inmode
!= XFmode
)
37100 tmp1
= gen_reg_rtx (XFmode
);
37102 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37103 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37113 rtx tmp0
= gen_reg_rtx (XFmode
);
37115 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37117 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37118 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37119 UNSPEC_TRUNC_NOOP
)));
37123 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37126 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37129 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37132 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37135 gcc_unreachable ();
37138 /* flags = signbit(a) */
37139 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37141 /* if (flags) then res = -res */
37142 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37143 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37144 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37146 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37147 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37148 JUMP_LABEL (insn
) = jump_label
;
37150 emit_insn (gen_neg (res
, res
));
37152 emit_label (jump_label
);
37153 LABEL_NUSES (jump_label
) = 1;
37155 emit_move_insn (op0
, res
);
37158 /* Output code to perform a Newton-Rhapson approximation of a single precision
37159 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37161 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37163 rtx x0
, x1
, e0
, e1
;
37165 x0
= gen_reg_rtx (mode
);
37166 e0
= gen_reg_rtx (mode
);
37167 e1
= gen_reg_rtx (mode
);
37168 x1
= gen_reg_rtx (mode
);
37170 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37172 b
= force_reg (mode
, b
);
37174 /* x0 = rcp(b) estimate */
37175 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37176 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37179 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37180 gen_rtx_MULT (mode
, x0
, b
)));
37183 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37184 gen_rtx_MULT (mode
, x0
, e0
)));
37187 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37188 gen_rtx_PLUS (mode
, x0
, x0
)));
37191 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37192 gen_rtx_MINUS (mode
, e1
, e0
)));
37195 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37196 gen_rtx_MULT (mode
, a
, x1
)));
37199 /* Output code to perform a Newton-Rhapson approximation of a
37200 single precision floating point [reciprocal] square root. */
37202 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37205 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37208 x0
= gen_reg_rtx (mode
);
37209 e0
= gen_reg_rtx (mode
);
37210 e1
= gen_reg_rtx (mode
);
37211 e2
= gen_reg_rtx (mode
);
37212 e3
= gen_reg_rtx (mode
);
37214 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37215 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37217 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37218 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37220 if (VECTOR_MODE_P (mode
))
37222 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37223 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37226 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37227 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37229 a
= force_reg (mode
, a
);
37231 /* x0 = rsqrt(a) estimate */
37232 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37233 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37236 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37241 zero
= gen_reg_rtx (mode
);
37242 mask
= gen_reg_rtx (mode
);
37244 zero
= force_reg (mode
, CONST0_RTX(mode
));
37245 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37246 gen_rtx_NE (mode
, zero
, a
)));
37248 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37249 gen_rtx_AND (mode
, x0
, mask
)));
37253 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37254 gen_rtx_MULT (mode
, x0
, a
)));
37256 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37257 gen_rtx_MULT (mode
, e0
, x0
)));
37260 mthree
= force_reg (mode
, mthree
);
37261 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37262 gen_rtx_PLUS (mode
, e1
, mthree
)));
37264 mhalf
= force_reg (mode
, mhalf
);
37266 /* e3 = -.5 * x0 */
37267 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37268 gen_rtx_MULT (mode
, x0
, mhalf
)));
37270 /* e3 = -.5 * e0 */
37271 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37272 gen_rtx_MULT (mode
, e0
, mhalf
)));
37273 /* ret = e2 * e3 */
37274 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37275 gen_rtx_MULT (mode
, e2
, e3
)));
37278 #ifdef TARGET_SOLARIS
37279 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37282 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
37285 /* With Binutils 2.15, the "@unwind" marker must be specified on
37286 every occurrence of the ".eh_frame" section, not just the first
37289 && strcmp (name
, ".eh_frame") == 0)
37291 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
37292 flags
& SECTION_WRITE
? "aw" : "a");
37297 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
37299 solaris_elf_asm_comdat_section (name
, flags
, decl
);
37304 default_elf_asm_named_section (name
, flags
, decl
);
37306 #endif /* TARGET_SOLARIS */
37308 /* Return the mangling of TYPE if it is an extended fundamental type. */
37310 static const char *
37311 ix86_mangle_type (const_tree type
)
37313 type
= TYPE_MAIN_VARIANT (type
);
37315 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
37316 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
37319 switch (TYPE_MODE (type
))
37322 /* __float128 is "g". */
37325 /* "long double" or __float80 is "e". */
37332 /* For 32-bit code we can save PIC register setup by using
37333 __stack_chk_fail_local hidden function instead of calling
37334 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37335 register, so it is better to call __stack_chk_fail directly. */
37337 static tree ATTRIBUTE_UNUSED
37338 ix86_stack_protect_fail (void)
37340 return TARGET_64BIT
37341 ? default_external_stack_protect_fail ()
37342 : default_hidden_stack_protect_fail ();
37345 /* Select a format to encode pointers in exception handling data. CODE
37346 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37347 true if the symbol may be affected by dynamic relocations.
37349 ??? All x86 object file formats are capable of representing this.
37350 After all, the relocation needed is the same as for the call insn.
37351 Whether or not a particular assembler allows us to enter such, I
37352 guess we'll have to see. */
37354 asm_preferred_eh_data_format (int code
, int global
)
37358 int type
= DW_EH_PE_sdata8
;
37360 || ix86_cmodel
== CM_SMALL_PIC
37361 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
37362 type
= DW_EH_PE_sdata4
;
37363 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
37365 if (ix86_cmodel
== CM_SMALL
37366 || (ix86_cmodel
== CM_MEDIUM
&& code
))
37367 return DW_EH_PE_udata4
;
37368 return DW_EH_PE_absptr
;
37371 /* Expand copysign from SIGN to the positive value ABS_VALUE
37372 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37375 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
37377 enum machine_mode mode
= GET_MODE (sign
);
37378 rtx sgn
= gen_reg_rtx (mode
);
37379 if (mask
== NULL_RTX
)
37381 enum machine_mode vmode
;
37383 if (mode
== SFmode
)
37385 else if (mode
== DFmode
)
37390 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
37391 if (!VECTOR_MODE_P (mode
))
37393 /* We need to generate a scalar mode mask in this case. */
37394 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37395 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37396 mask
= gen_reg_rtx (mode
);
37397 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37401 mask
= gen_rtx_NOT (mode
, mask
);
37402 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
37403 gen_rtx_AND (mode
, mask
, sign
)));
37404 emit_insn (gen_rtx_SET (VOIDmode
, result
,
37405 gen_rtx_IOR (mode
, abs_value
, sgn
)));
37408 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37409 mask for masking out the sign-bit is stored in *SMASK, if that is
37412 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
37414 enum machine_mode vmode
, mode
= GET_MODE (op0
);
37417 xa
= gen_reg_rtx (mode
);
37418 if (mode
== SFmode
)
37420 else if (mode
== DFmode
)
37424 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
37425 if (!VECTOR_MODE_P (mode
))
37427 /* We need to generate a scalar mode mask in this case. */
37428 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37429 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37430 mask
= gen_reg_rtx (mode
);
37431 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37433 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
37434 gen_rtx_AND (mode
, op0
, mask
)));
37442 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37443 swapping the operands if SWAP_OPERANDS is true. The expanded
37444 code is a forward jump to a newly created label in case the
37445 comparison is true. The generated label rtx is returned. */
37447 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
37448 bool swap_operands
)
37459 label
= gen_label_rtx ();
37460 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
37461 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37462 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
37463 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
37464 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
37465 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
37466 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37467 JUMP_LABEL (tmp
) = label
;
37472 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37473 using comparison code CODE. Operands are swapped for the comparison if
37474 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37476 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
37477 bool swap_operands
)
37479 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
37480 enum machine_mode mode
= GET_MODE (op0
);
37481 rtx mask
= gen_reg_rtx (mode
);
37490 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
37492 emit_insn (insn (mask
, op0
, op1
,
37493 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
37497 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37498 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37500 ix86_gen_TWO52 (enum machine_mode mode
)
37502 REAL_VALUE_TYPE TWO52r
;
37505 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
37506 TWO52
= const_double_from_real_value (TWO52r
, mode
);
37507 TWO52
= force_reg (mode
, TWO52
);
37512 /* Expand SSE sequence for computing lround from OP1 storing
37515 ix86_expand_lround (rtx op0
, rtx op1
)
37517 /* C code for the stuff we're doing below:
37518 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
37521 enum machine_mode mode
= GET_MODE (op1
);
37522 const struct real_format
*fmt
;
37523 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37526 /* load nextafter (0.5, 0.0) */
37527 fmt
= REAL_MODE_FORMAT (mode
);
37528 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37529 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37531 /* adj = copysign (0.5, op1) */
37532 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37533 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
37535 /* adj = op1 + adj */
37536 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
37538 /* op0 = (imode)adj */
37539 expand_fix (op0
, adj
, 0);
37542 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
37545 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
37547 /* C code for the stuff we're doing below (for do_floor):
37549 xi -= (double)xi > op1 ? 1 : 0;
37552 enum machine_mode fmode
= GET_MODE (op1
);
37553 enum machine_mode imode
= GET_MODE (op0
);
37554 rtx ireg
, freg
, label
, tmp
;
37556 /* reg = (long)op1 */
37557 ireg
= gen_reg_rtx (imode
);
37558 expand_fix (ireg
, op1
, 0);
37560 /* freg = (double)reg */
37561 freg
= gen_reg_rtx (fmode
);
37562 expand_float (freg
, ireg
, 0);
37564 /* ireg = (freg > op1) ? ireg - 1 : ireg */
37565 label
= ix86_expand_sse_compare_and_jump (UNLE
,
37566 freg
, op1
, !do_floor
);
37567 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
37568 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
37569 emit_move_insn (ireg
, tmp
);
37571 emit_label (label
);
37572 LABEL_NUSES (label
) = 1;
37574 emit_move_insn (op0
, ireg
);
37577 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
37578 result in OPERAND0. */
37580 ix86_expand_rint (rtx operand0
, rtx operand1
)
37582 /* C code for the stuff we're doing below:
37583 xa = fabs (operand1);
37584 if (!isless (xa, 2**52))
37586 xa = xa + 2**52 - 2**52;
37587 return copysign (xa, operand1);
37589 enum machine_mode mode
= GET_MODE (operand0
);
37590 rtx res
, xa
, label
, TWO52
, mask
;
37592 res
= gen_reg_rtx (mode
);
37593 emit_move_insn (res
, operand1
);
37595 /* xa = abs (operand1) */
37596 xa
= ix86_expand_sse_fabs (res
, &mask
);
37598 /* if (!isless (xa, TWO52)) goto label; */
37599 TWO52
= ix86_gen_TWO52 (mode
);
37600 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37602 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37603 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37605 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
37607 emit_label (label
);
37608 LABEL_NUSES (label
) = 1;
37610 emit_move_insn (operand0
, res
);
37613 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37616 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
37618 /* C code for the stuff we expand below.
37619 double xa = fabs (x), x2;
37620 if (!isless (xa, TWO52))
37622 xa = xa + TWO52 - TWO52;
37623 x2 = copysign (xa, x);
37632 enum machine_mode mode
= GET_MODE (operand0
);
37633 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
37635 TWO52
= ix86_gen_TWO52 (mode
);
37637 /* Temporary for holding the result, initialized to the input
37638 operand to ease control flow. */
37639 res
= gen_reg_rtx (mode
);
37640 emit_move_insn (res
, operand1
);
37642 /* xa = abs (operand1) */
37643 xa
= ix86_expand_sse_fabs (res
, &mask
);
37645 /* if (!isless (xa, TWO52)) goto label; */
37646 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37648 /* xa = xa + TWO52 - TWO52; */
37649 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37650 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37652 /* xa = copysign (xa, operand1) */
37653 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
37655 /* generate 1.0 or -1.0 */
37656 one
= force_reg (mode
,
37657 const_double_from_real_value (do_floor
37658 ? dconst1
: dconstm1
, mode
));
37660 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37661 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37662 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37663 gen_rtx_AND (mode
, one
, tmp
)));
37664 /* We always need to subtract here to preserve signed zero. */
37665 tmp
= expand_simple_binop (mode
, MINUS
,
37666 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37667 emit_move_insn (res
, tmp
);
37669 emit_label (label
);
37670 LABEL_NUSES (label
) = 1;
37672 emit_move_insn (operand0
, res
);
37675 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37678 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
37680 /* C code for the stuff we expand below.
37681 double xa = fabs (x), x2;
37682 if (!isless (xa, TWO52))
37684 x2 = (double)(long)x;
37691 if (HONOR_SIGNED_ZEROS (mode))
37692 return copysign (x2, x);
37695 enum machine_mode mode
= GET_MODE (operand0
);
37696 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
37698 TWO52
= ix86_gen_TWO52 (mode
);
37700 /* Temporary for holding the result, initialized to the input
37701 operand to ease control flow. */
37702 res
= gen_reg_rtx (mode
);
37703 emit_move_insn (res
, operand1
);
37705 /* xa = abs (operand1) */
37706 xa
= ix86_expand_sse_fabs (res
, &mask
);
37708 /* if (!isless (xa, TWO52)) goto label; */
37709 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37711 /* xa = (double)(long)x */
37712 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37713 expand_fix (xi
, res
, 0);
37714 expand_float (xa
, xi
, 0);
37717 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37719 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37720 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37721 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37722 gen_rtx_AND (mode
, one
, tmp
)));
37723 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
37724 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37725 emit_move_insn (res
, tmp
);
37727 if (HONOR_SIGNED_ZEROS (mode
))
37728 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37730 emit_label (label
);
37731 LABEL_NUSES (label
) = 1;
37733 emit_move_insn (operand0
, res
);
37736 /* Expand SSE sequence for computing round from OPERAND1 storing
37737 into OPERAND0. Sequence that works without relying on DImode truncation
37738 via cvttsd2siq that is only available on 64bit targets. */
37740 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
37742 /* C code for the stuff we expand below.
37743 double xa = fabs (x), xa2, x2;
37744 if (!isless (xa, TWO52))
37746 Using the absolute value and copying back sign makes
37747 -0.0 -> -0.0 correct.
37748 xa2 = xa + TWO52 - TWO52;
37753 else if (dxa > 0.5)
37755 x2 = copysign (xa2, x);
37758 enum machine_mode mode
= GET_MODE (operand0
);
37759 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
37761 TWO52
= ix86_gen_TWO52 (mode
);
37763 /* Temporary for holding the result, initialized to the input
37764 operand to ease control flow. */
37765 res
= gen_reg_rtx (mode
);
37766 emit_move_insn (res
, operand1
);
37768 /* xa = abs (operand1) */
37769 xa
= ix86_expand_sse_fabs (res
, &mask
);
37771 /* if (!isless (xa, TWO52)) goto label; */
37772 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37774 /* xa2 = xa + TWO52 - TWO52; */
37775 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37776 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
37778 /* dxa = xa2 - xa; */
37779 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
37781 /* generate 0.5, 1.0 and -0.5 */
37782 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
37783 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37784 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
37788 tmp
= gen_reg_rtx (mode
);
37789 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
37790 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
37791 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37792 gen_rtx_AND (mode
, one
, tmp
)));
37793 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37794 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
37795 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
37796 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37797 gen_rtx_AND (mode
, one
, tmp
)));
37798 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37800 /* res = copysign (xa2, operand1) */
37801 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
37803 emit_label (label
);
37804 LABEL_NUSES (label
) = 1;
37806 emit_move_insn (operand0
, res
);
37809 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37812 ix86_expand_trunc (rtx operand0
, rtx operand1
)
37814 /* C code for SSE variant we expand below.
37815 double xa = fabs (x), x2;
37816 if (!isless (xa, TWO52))
37818 x2 = (double)(long)x;
37819 if (HONOR_SIGNED_ZEROS (mode))
37820 return copysign (x2, x);
37823 enum machine_mode mode
= GET_MODE (operand0
);
37824 rtx xa
, xi
, TWO52
, label
, res
, mask
;
37826 TWO52
= ix86_gen_TWO52 (mode
);
37828 /* Temporary for holding the result, initialized to the input
37829 operand to ease control flow. */
37830 res
= gen_reg_rtx (mode
);
37831 emit_move_insn (res
, operand1
);
37833 /* xa = abs (operand1) */
37834 xa
= ix86_expand_sse_fabs (res
, &mask
);
37836 /* if (!isless (xa, TWO52)) goto label; */
37837 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37839 /* x = (double)(long)x */
37840 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37841 expand_fix (xi
, res
, 0);
37842 expand_float (res
, xi
, 0);
37844 if (HONOR_SIGNED_ZEROS (mode
))
37845 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37847 emit_label (label
);
37848 LABEL_NUSES (label
) = 1;
37850 emit_move_insn (operand0
, res
);
37853 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37856 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
37858 enum machine_mode mode
= GET_MODE (operand0
);
37859 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
37861 /* C code for SSE variant we expand below.
37862 double xa = fabs (x), x2;
37863 if (!isless (xa, TWO52))
37865 xa2 = xa + TWO52 - TWO52;
37869 x2 = copysign (xa2, x);
37873 TWO52
= ix86_gen_TWO52 (mode
);
37875 /* Temporary for holding the result, initialized to the input
37876 operand to ease control flow. */
37877 res
= gen_reg_rtx (mode
);
37878 emit_move_insn (res
, operand1
);
37880 /* xa = abs (operand1) */
37881 xa
= ix86_expand_sse_fabs (res
, &smask
);
37883 /* if (!isless (xa, TWO52)) goto label; */
37884 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37886 /* res = xa + TWO52 - TWO52; */
37887 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37888 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
37889 emit_move_insn (res
, tmp
);
37892 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37894 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
37895 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
37896 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37897 gen_rtx_AND (mode
, mask
, one
)));
37898 tmp
= expand_simple_binop (mode
, MINUS
,
37899 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
37900 emit_move_insn (res
, tmp
);
37902 /* res = copysign (res, operand1) */
37903 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
37905 emit_label (label
);
37906 LABEL_NUSES (label
) = 1;
37908 emit_move_insn (operand0
, res
);
37911 /* Expand SSE sequence for computing round from OPERAND1 storing
37914 ix86_expand_round (rtx operand0
, rtx operand1
)
37916 /* C code for the stuff we're doing below:
37917 double xa = fabs (x);
37918 if (!isless (xa, TWO52))
37920 xa = (double)(long)(xa + nextafter (0.5, 0.0));
37921 return copysign (xa, x);
37923 enum machine_mode mode
= GET_MODE (operand0
);
37924 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
37925 const struct real_format
*fmt
;
37926 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37928 /* Temporary for holding the result, initialized to the input
37929 operand to ease control flow. */
37930 res
= gen_reg_rtx (mode
);
37931 emit_move_insn (res
, operand1
);
37933 TWO52
= ix86_gen_TWO52 (mode
);
37934 xa
= ix86_expand_sse_fabs (res
, &mask
);
37935 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37937 /* load nextafter (0.5, 0.0) */
37938 fmt
= REAL_MODE_FORMAT (mode
);
37939 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37940 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37942 /* xa = xa + 0.5 */
37943 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37944 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37946 /* xa = (double)(int64_t)xa */
37947 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37948 expand_fix (xi
, xa
, 0);
37949 expand_float (xa
, xi
, 0);
37951 /* res = copysign (xa, operand1) */
37952 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
37954 emit_label (label
);
37955 LABEL_NUSES (label
) = 1;
37957 emit_move_insn (operand0
, res
);
37960 /* Expand SSE sequence for computing round
37961 from OP1 storing into OP0 using sse4 round insn. */
37963 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
37965 enum machine_mode mode
= GET_MODE (op0
);
37966 rtx e1
, e2
, res
, half
;
37967 const struct real_format
*fmt
;
37968 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37969 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
37970 rtx (*gen_round
) (rtx
, rtx
, rtx
);
37975 gen_copysign
= gen_copysignsf3
;
37976 gen_round
= gen_sse4_1_roundsf2
;
37979 gen_copysign
= gen_copysigndf3
;
37980 gen_round
= gen_sse4_1_rounddf2
;
37983 gcc_unreachable ();
37986 /* round (a) = trunc (a + copysign (0.5, a)) */
37988 /* load nextafter (0.5, 0.0) */
37989 fmt
= REAL_MODE_FORMAT (mode
);
37990 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37991 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37992 half
= const_double_from_real_value (pred_half
, mode
);
37994 /* e1 = copysign (0.5, op1) */
37995 e1
= gen_reg_rtx (mode
);
37996 emit_insn (gen_copysign (e1
, half
, op1
));
37998 /* e2 = op1 + e1 */
37999 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38001 /* res = trunc (e2) */
38002 res
= gen_reg_rtx (mode
);
38003 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38005 emit_move_insn (op0
, res
);
38009 /* Table of valid machine attributes. */
38010 static const struct attribute_spec ix86_attribute_table
[] =
38012 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38013 affects_type_identity } */
38014 /* Stdcall attribute says callee is responsible for popping arguments
38015 if they are not variable. */
38016 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38018 /* Fastcall attribute says callee is responsible for popping arguments
38019 if they are not variable. */
38020 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38022 /* Thiscall attribute says callee is responsible for popping arguments
38023 if they are not variable. */
38024 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38026 /* Cdecl attribute says the callee is a normal C declaration */
38027 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38029 /* Regparm attribute specifies how many integer arguments are to be
38030 passed in registers. */
38031 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38033 /* Sseregparm attribute says we are using x86_64 calling conventions
38034 for FP arguments. */
38035 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38037 /* The transactional memory builtins are implicitly regparm or fastcall
38038 depending on the ABI. Override the generic do-nothing attribute that
38039 these builtins were declared with. */
38040 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38042 /* force_align_arg_pointer says this function realigns the stack at entry. */
38043 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38044 false, true, true, ix86_handle_cconv_attribute
, false },
38045 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38046 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38047 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38048 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38051 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38053 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38055 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38056 SUBTARGET_ATTRIBUTE_TABLE
,
38058 /* ms_abi and sysv_abi calling convention function attributes. */
38059 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38060 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38061 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38063 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38064 ix86_handle_callee_pop_aggregate_return
, true },
38066 { NULL
, 0, 0, false, false, false, NULL
, false }
38069 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38071 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38073 int misalign ATTRIBUTE_UNUSED
)
38077 switch (type_of_cost
)
38080 return ix86_cost
->scalar_stmt_cost
;
38083 return ix86_cost
->scalar_load_cost
;
38086 return ix86_cost
->scalar_store_cost
;
38089 return ix86_cost
->vec_stmt_cost
;
38092 return ix86_cost
->vec_align_load_cost
;
38095 return ix86_cost
->vec_store_cost
;
38097 case vec_to_scalar
:
38098 return ix86_cost
->vec_to_scalar_cost
;
38100 case scalar_to_vec
:
38101 return ix86_cost
->scalar_to_vec_cost
;
38103 case unaligned_load
:
38104 case unaligned_store
:
38105 return ix86_cost
->vec_unalign_load_cost
;
38107 case cond_branch_taken
:
38108 return ix86_cost
->cond_taken_branch_cost
;
38110 case cond_branch_not_taken
:
38111 return ix86_cost
->cond_not_taken_branch_cost
;
38114 case vec_promote_demote
:
38115 return ix86_cost
->vec_stmt_cost
;
38117 case vec_construct
:
38118 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38119 return elements
/ 2 + 1;
38122 gcc_unreachable ();
38126 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38127 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38128 insn every time. */
38130 static GTY(()) rtx vselect_insn
;
38132 /* Initialize vselect_insn. */
38135 init_vselect_insn (void)
38140 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38141 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38142 XVECEXP (x
, 0, i
) = const0_rtx
;
38143 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38145 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38147 vselect_insn
= emit_insn (x
);
38151 /* Construct (set target (vec_select op0 (parallel perm))) and
38152 return true if that's a valid instruction in the active ISA. */
38155 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38156 unsigned nelt
, bool testing_p
)
38159 rtx x
, save_vconcat
;
38162 if (vselect_insn
== NULL_RTX
)
38163 init_vselect_insn ();
38165 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38166 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38167 for (i
= 0; i
< nelt
; ++i
)
38168 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38169 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38170 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38171 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38172 SET_DEST (PATTERN (vselect_insn
)) = target
;
38173 icode
= recog_memoized (vselect_insn
);
38175 if (icode
>= 0 && !testing_p
)
38176 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38178 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38179 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38180 INSN_CODE (vselect_insn
) = -1;
38185 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38188 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38189 const unsigned char *perm
, unsigned nelt
,
38192 enum machine_mode v2mode
;
38196 if (vselect_insn
== NULL_RTX
)
38197 init_vselect_insn ();
38199 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38200 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38201 PUT_MODE (x
, v2mode
);
38204 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38205 XEXP (x
, 0) = const0_rtx
;
38206 XEXP (x
, 1) = const0_rtx
;
38210 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38211 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38214 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38216 enum machine_mode vmode
= d
->vmode
;
38217 unsigned i
, mask
, nelt
= d
->nelt
;
38218 rtx target
, op0
, op1
, x
;
38219 rtx rperm
[32], vperm
;
38221 if (d
->one_operand_p
)
38223 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38225 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38227 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38232 /* This is a blend, not a permute. Elements must stay in their
38233 respective lanes. */
38234 for (i
= 0; i
< nelt
; ++i
)
38236 unsigned e
= d
->perm
[i
];
38237 if (!(e
== i
|| e
== i
+ nelt
))
38244 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38245 decision should be extracted elsewhere, so that we only try that
38246 sequence once all budget==3 options have been tried. */
38247 target
= d
->target
;
38260 for (i
= 0; i
< nelt
; ++i
)
38261 mask
|= (d
->perm
[i
] >= nelt
) << i
;
38265 for (i
= 0; i
< 2; ++i
)
38266 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
38271 for (i
= 0; i
< 4; ++i
)
38272 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38277 /* See if bytes move in pairs so we can use pblendw with
38278 an immediate argument, rather than pblendvb with a vector
38280 for (i
= 0; i
< 16; i
+= 2)
38281 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38284 for (i
= 0; i
< nelt
; ++i
)
38285 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
38288 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
38289 vperm
= force_reg (vmode
, vperm
);
38291 if (GET_MODE_SIZE (vmode
) == 16)
38292 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
38294 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
38298 for (i
= 0; i
< 8; ++i
)
38299 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38304 target
= gen_lowpart (vmode
, target
);
38305 op0
= gen_lowpart (vmode
, op0
);
38306 op1
= gen_lowpart (vmode
, op1
);
38310 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38311 for (i
= 0; i
< 32; i
+= 2)
38312 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38314 /* See if bytes move in quadruplets. If yes, vpblendd
38315 with immediate can be used. */
38316 for (i
= 0; i
< 32; i
+= 4)
38317 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
38321 /* See if bytes move the same in both lanes. If yes,
38322 vpblendw with immediate can be used. */
38323 for (i
= 0; i
< 16; i
+= 2)
38324 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
38327 /* Use vpblendw. */
38328 for (i
= 0; i
< 16; ++i
)
38329 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
38334 /* Use vpblendd. */
38335 for (i
= 0; i
< 8; ++i
)
38336 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
38341 /* See if words move in pairs. If yes, vpblendd can be used. */
38342 for (i
= 0; i
< 16; i
+= 2)
38343 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38347 /* See if words move the same in both lanes. If not,
38348 vpblendvb must be used. */
38349 for (i
= 0; i
< 8; i
++)
38350 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
38352 /* Use vpblendvb. */
38353 for (i
= 0; i
< 32; ++i
)
38354 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
38358 target
= gen_lowpart (vmode
, target
);
38359 op0
= gen_lowpart (vmode
, op0
);
38360 op1
= gen_lowpart (vmode
, op1
);
38361 goto finish_pblendvb
;
38364 /* Use vpblendw. */
38365 for (i
= 0; i
< 16; ++i
)
38366 mask
|= (d
->perm
[i
] >= 16) << i
;
38370 /* Use vpblendd. */
38371 for (i
= 0; i
< 8; ++i
)
38372 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38377 /* Use vpblendd. */
38378 for (i
= 0; i
< 4; ++i
)
38379 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38384 gcc_unreachable ();
38387 /* This matches five different patterns with the different modes. */
38388 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
38389 x
= gen_rtx_SET (VOIDmode
, target
, x
);
38395 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38396 in terms of the variable form of vpermilps.
38398 Note that we will have already failed the immediate input vpermilps,
38399 which requires that the high and low part shuffle be identical; the
38400 variable form doesn't require that. */
38403 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
38405 rtx rperm
[8], vperm
;
38408 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
38411 /* We can only permute within the 128-bit lane. */
38412 for (i
= 0; i
< 8; ++i
)
38414 unsigned e
= d
->perm
[i
];
38415 if (i
< 4 ? e
>= 4 : e
< 4)
38422 for (i
= 0; i
< 8; ++i
)
38424 unsigned e
= d
->perm
[i
];
38426 /* Within each 128-bit lane, the elements of op0 are numbered
38427 from 0 and the elements of op1 are numbered from 4. */
38433 rperm
[i
] = GEN_INT (e
);
38436 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
38437 vperm
= force_reg (V8SImode
, vperm
);
38438 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
38443 /* Return true if permutation D can be performed as VMODE permutation
38447 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
38449 unsigned int i
, j
, chunk
;
38451 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
38452 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
38453 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
38456 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
38459 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
38460 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
38461 if (d
->perm
[i
] & (chunk
- 1))
38464 for (j
= 1; j
< chunk
; ++j
)
38465 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
38471 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38472 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38475 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
38477 unsigned i
, nelt
, eltsz
, mask
;
38478 unsigned char perm
[32];
38479 enum machine_mode vmode
= V16QImode
;
38480 rtx rperm
[32], vperm
, target
, op0
, op1
;
38484 if (!d
->one_operand_p
)
38486 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
38489 && valid_perm_using_mode_p (V2TImode
, d
))
38494 /* Use vperm2i128 insn. The pattern uses
38495 V4DImode instead of V2TImode. */
38496 target
= gen_lowpart (V4DImode
, d
->target
);
38497 op0
= gen_lowpart (V4DImode
, d
->op0
);
38498 op1
= gen_lowpart (V4DImode
, d
->op1
);
38500 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
38501 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
38502 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
38510 if (GET_MODE_SIZE (d
->vmode
) == 16)
38515 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38520 /* V4DImode should be already handled through
38521 expand_vselect by vpermq instruction. */
38522 gcc_assert (d
->vmode
!= V4DImode
);
38525 if (d
->vmode
== V8SImode
38526 || d
->vmode
== V16HImode
38527 || d
->vmode
== V32QImode
)
38529 /* First see if vpermq can be used for
38530 V8SImode/V16HImode/V32QImode. */
38531 if (valid_perm_using_mode_p (V4DImode
, d
))
38533 for (i
= 0; i
< 4; i
++)
38534 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
38537 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
38538 gen_lowpart (V4DImode
, d
->op0
),
38542 /* Next see if vpermd can be used. */
38543 if (valid_perm_using_mode_p (V8SImode
, d
))
38546 /* Or if vpermps can be used. */
38547 else if (d
->vmode
== V8SFmode
)
38550 if (vmode
== V32QImode
)
38552 /* vpshufb only works intra lanes, it is not
38553 possible to shuffle bytes in between the lanes. */
38554 for (i
= 0; i
< nelt
; ++i
)
38555 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
38566 if (vmode
== V8SImode
)
38567 for (i
= 0; i
< 8; ++i
)
38568 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
38571 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38572 if (!d
->one_operand_p
)
38573 mask
= 2 * nelt
- 1;
38574 else if (vmode
== V16QImode
)
38577 mask
= nelt
/ 2 - 1;
38579 for (i
= 0; i
< nelt
; ++i
)
38581 unsigned j
, e
= d
->perm
[i
] & mask
;
38582 for (j
= 0; j
< eltsz
; ++j
)
38583 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
38587 vperm
= gen_rtx_CONST_VECTOR (vmode
,
38588 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
38589 vperm
= force_reg (vmode
, vperm
);
38591 target
= gen_lowpart (vmode
, d
->target
);
38592 op0
= gen_lowpart (vmode
, d
->op0
);
38593 if (d
->one_operand_p
)
38595 if (vmode
== V16QImode
)
38596 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
38597 else if (vmode
== V32QImode
)
38598 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
38599 else if (vmode
== V8SFmode
)
38600 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
38602 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
38606 op1
= gen_lowpart (vmode
, d
->op1
);
38607 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
38613 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
38614 in a single instruction. */
38617 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
38619 unsigned i
, nelt
= d
->nelt
;
38620 unsigned char perm2
[MAX_VECT_LEN
];
38622 /* Check plain VEC_SELECT first, because AVX has instructions that could
38623 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
38624 input where SEL+CONCAT may not. */
38625 if (d
->one_operand_p
)
38627 int mask
= nelt
- 1;
38628 bool identity_perm
= true;
38629 bool broadcast_perm
= true;
38631 for (i
= 0; i
< nelt
; i
++)
38633 perm2
[i
] = d
->perm
[i
] & mask
;
38635 identity_perm
= false;
38637 broadcast_perm
= false;
38643 emit_move_insn (d
->target
, d
->op0
);
38646 else if (broadcast_perm
&& TARGET_AVX2
)
38648 /* Use vpbroadcast{b,w,d}. */
38649 rtx (*gen
) (rtx
, rtx
) = NULL
;
38653 gen
= gen_avx2_pbroadcastv32qi_1
;
38656 gen
= gen_avx2_pbroadcastv16hi_1
;
38659 gen
= gen_avx2_pbroadcastv8si_1
;
38662 gen
= gen_avx2_pbroadcastv16qi
;
38665 gen
= gen_avx2_pbroadcastv8hi
;
38668 gen
= gen_avx2_vec_dupv8sf_1
;
38670 /* For other modes prefer other shuffles this function creates. */
38676 emit_insn (gen (d
->target
, d
->op0
));
38681 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
38684 /* There are plenty of patterns in sse.md that are written for
38685 SEL+CONCAT and are not replicated for a single op. Perhaps
38686 that should be changed, to avoid the nastiness here. */
38688 /* Recognize interleave style patterns, which means incrementing
38689 every other permutation operand. */
38690 for (i
= 0; i
< nelt
; i
+= 2)
38692 perm2
[i
] = d
->perm
[i
] & mask
;
38693 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
38695 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38699 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
38702 for (i
= 0; i
< nelt
; i
+= 4)
38704 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
38705 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
38706 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
38707 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
38710 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38716 /* Finally, try the fully general two operand permute. */
38717 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
38721 /* Recognize interleave style patterns with reversed operands. */
38722 if (!d
->one_operand_p
)
38724 for (i
= 0; i
< nelt
; ++i
)
38726 unsigned e
= d
->perm
[i
];
38734 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
38739 /* Try the SSE4.1 blend variable merge instructions. */
38740 if (expand_vec_perm_blend (d
))
38743 /* Try one of the AVX vpermil variable permutations. */
38744 if (expand_vec_perm_vpermil (d
))
38747 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
38748 vpshufb, vpermd, vpermps or vpermq variable permutation. */
38749 if (expand_vec_perm_pshufb (d
))
38755 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38756 in terms of a pair of pshuflw + pshufhw instructions. */
38759 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
38761 unsigned char perm2
[MAX_VECT_LEN
];
38765 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
38768 /* The two permutations only operate in 64-bit lanes. */
38769 for (i
= 0; i
< 4; ++i
)
38770 if (d
->perm
[i
] >= 4)
38772 for (i
= 4; i
< 8; ++i
)
38773 if (d
->perm
[i
] < 4)
38779 /* Emit the pshuflw. */
38780 memcpy (perm2
, d
->perm
, 4);
38781 for (i
= 4; i
< 8; ++i
)
38783 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
38786 /* Emit the pshufhw. */
38787 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
38788 for (i
= 0; i
< 4; ++i
)
38790 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
38796 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38797 the permutation using the SSSE3 palignr instruction. This succeeds
38798 when all of the elements in PERM fit within one vector and we merely
38799 need to shift them down so that a single vector permutation has a
38800 chance to succeed. */
38803 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
38805 unsigned i
, nelt
= d
->nelt
;
38810 /* Even with AVX, palignr only operates on 128-bit vectors. */
38811 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
38814 min
= nelt
, max
= 0;
38815 for (i
= 0; i
< nelt
; ++i
)
38817 unsigned e
= d
->perm
[i
];
38823 if (min
== 0 || max
- min
>= nelt
)
38826 /* Given that we have SSSE3, we know we'll be able to implement the
38827 single operand permutation after the palignr with pshufb. */
38831 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
38832 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
38833 gen_lowpart (TImode
, d
->op1
),
38834 gen_lowpart (TImode
, d
->op0
), shift
));
38836 d
->op0
= d
->op1
= d
->target
;
38837 d
->one_operand_p
= true;
38840 for (i
= 0; i
< nelt
; ++i
)
38842 unsigned e
= d
->perm
[i
] - min
;
38848 /* Test for the degenerate case where the alignment by itself
38849 produces the desired permutation. */
38853 ok
= expand_vec_perm_1 (d
);
38859 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
38861 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38862 a two vector permutation into a single vector permutation by using
38863 an interleave operation to merge the vectors. */
38866 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
38868 struct expand_vec_perm_d dremap
, dfinal
;
38869 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
38870 unsigned HOST_WIDE_INT contents
;
38871 unsigned char remap
[2 * MAX_VECT_LEN
];
38873 bool ok
, same_halves
= false;
38875 if (GET_MODE_SIZE (d
->vmode
) == 16)
38877 if (d
->one_operand_p
)
38880 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38884 /* For 32-byte modes allow even d->one_operand_p.
38885 The lack of cross-lane shuffling in some instructions
38886 might prevent a single insn shuffle. */
38888 dfinal
.testing_p
= true;
38889 /* If expand_vec_perm_interleave3 can expand this into
38890 a 3 insn sequence, give up and let it be expanded as
38891 3 insn sequence. While that is one insn longer,
38892 it doesn't need a memory operand and in the common
38893 case that both interleave low and high permutations
38894 with the same operands are adjacent needs 4 insns
38895 for both after CSE. */
38896 if (expand_vec_perm_interleave3 (&dfinal
))
38902 /* Examine from whence the elements come. */
38904 for (i
= 0; i
< nelt
; ++i
)
38905 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
38907 memset (remap
, 0xff, sizeof (remap
));
38910 if (GET_MODE_SIZE (d
->vmode
) == 16)
38912 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
38914 /* Split the two input vectors into 4 halves. */
38915 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
38920 /* If the elements from the low halves use interleave low, and similarly
38921 for interleave high. If the elements are from mis-matched halves, we
38922 can use shufps for V4SF/V4SI or do a DImode shuffle. */
38923 if ((contents
& (h1
| h3
)) == contents
)
38926 for (i
= 0; i
< nelt2
; ++i
)
38929 remap
[i
+ nelt
] = i
* 2 + 1;
38930 dremap
.perm
[i
* 2] = i
;
38931 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
38933 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
38934 dremap
.vmode
= V4SFmode
;
38936 else if ((contents
& (h2
| h4
)) == contents
)
38939 for (i
= 0; i
< nelt2
; ++i
)
38941 remap
[i
+ nelt2
] = i
* 2;
38942 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
38943 dremap
.perm
[i
* 2] = i
+ nelt2
;
38944 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
38946 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
38947 dremap
.vmode
= V4SFmode
;
38949 else if ((contents
& (h1
| h4
)) == contents
)
38952 for (i
= 0; i
< nelt2
; ++i
)
38955 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
38956 dremap
.perm
[i
] = i
;
38957 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
38962 dremap
.vmode
= V2DImode
;
38964 dremap
.perm
[0] = 0;
38965 dremap
.perm
[1] = 3;
38968 else if ((contents
& (h2
| h3
)) == contents
)
38971 for (i
= 0; i
< nelt2
; ++i
)
38973 remap
[i
+ nelt2
] = i
;
38974 remap
[i
+ nelt
] = i
+ nelt2
;
38975 dremap
.perm
[i
] = i
+ nelt2
;
38976 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
38981 dremap
.vmode
= V2DImode
;
38983 dremap
.perm
[0] = 1;
38984 dremap
.perm
[1] = 2;
38992 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
38993 unsigned HOST_WIDE_INT q
[8];
38994 unsigned int nonzero_halves
[4];
38996 /* Split the two input vectors into 8 quarters. */
38997 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
38998 for (i
= 1; i
< 8; ++i
)
38999 q
[i
] = q
[0] << (nelt4
* i
);
39000 for (i
= 0; i
< 4; ++i
)
39001 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39003 nonzero_halves
[nzcnt
] = i
;
39009 gcc_assert (d
->one_operand_p
);
39010 nonzero_halves
[1] = nonzero_halves
[0];
39011 same_halves
= true;
39013 else if (d
->one_operand_p
)
39015 gcc_assert (nonzero_halves
[0] == 0);
39016 gcc_assert (nonzero_halves
[1] == 1);
39021 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39023 /* Attempt to increase the likelihood that dfinal
39024 shuffle will be intra-lane. */
39025 char tmph
= nonzero_halves
[0];
39026 nonzero_halves
[0] = nonzero_halves
[1];
39027 nonzero_halves
[1] = tmph
;
39030 /* vperm2f128 or vperm2i128. */
39031 for (i
= 0; i
< nelt2
; ++i
)
39033 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39034 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39035 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39036 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39039 if (d
->vmode
!= V8SFmode
39040 && d
->vmode
!= V4DFmode
39041 && d
->vmode
!= V8SImode
)
39043 dremap
.vmode
= V8SImode
;
39045 for (i
= 0; i
< 4; ++i
)
39047 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39048 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39052 else if (d
->one_operand_p
)
39054 else if (TARGET_AVX2
39055 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39058 for (i
= 0; i
< nelt4
; ++i
)
39061 remap
[i
+ nelt
] = i
* 2 + 1;
39062 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39063 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39064 dremap
.perm
[i
* 2] = i
;
39065 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39066 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39067 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39070 else if (TARGET_AVX2
39071 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39074 for (i
= 0; i
< nelt4
; ++i
)
39076 remap
[i
+ nelt4
] = i
* 2;
39077 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39078 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39079 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39080 dremap
.perm
[i
* 2] = i
+ nelt4
;
39081 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39082 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39083 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39090 /* Use the remapping array set up above to move the elements from their
39091 swizzled locations into their final destinations. */
39093 for (i
= 0; i
< nelt
; ++i
)
39095 unsigned e
= remap
[d
->perm
[i
]];
39096 gcc_assert (e
< nelt
);
39097 /* If same_halves is true, both halves of the remapped vector are the
39098 same. Avoid cross-lane accesses if possible. */
39099 if (same_halves
&& i
>= nelt2
)
39101 gcc_assert (e
< nelt2
);
39102 dfinal
.perm
[i
] = e
+ nelt2
;
39105 dfinal
.perm
[i
] = e
;
39107 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39108 dfinal
.op1
= dfinal
.op0
;
39109 dfinal
.one_operand_p
= true;
39110 dremap
.target
= dfinal
.op0
;
39112 /* Test if the final remap can be done with a single insn. For V4SFmode or
39113 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39115 ok
= expand_vec_perm_1 (&dfinal
);
39116 seq
= get_insns ();
39125 if (dremap
.vmode
!= dfinal
.vmode
)
39127 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39128 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39129 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39132 ok
= expand_vec_perm_1 (&dremap
);
39139 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39140 a single vector cross-lane permutation into vpermq followed
39141 by any of the single insn permutations. */
39144 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39146 struct expand_vec_perm_d dremap
, dfinal
;
39147 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39148 unsigned contents
[2];
39152 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39153 && d
->one_operand_p
))
39158 for (i
= 0; i
< nelt2
; ++i
)
39160 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39161 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39164 for (i
= 0; i
< 2; ++i
)
39166 unsigned int cnt
= 0;
39167 for (j
= 0; j
< 4; ++j
)
39168 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39176 dremap
.vmode
= V4DImode
;
39178 dremap
.target
= gen_reg_rtx (V4DImode
);
39179 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39180 dremap
.op1
= dremap
.op0
;
39181 dremap
.one_operand_p
= true;
39182 for (i
= 0; i
< 2; ++i
)
39184 unsigned int cnt
= 0;
39185 for (j
= 0; j
< 4; ++j
)
39186 if ((contents
[i
] & (1u << j
)) != 0)
39187 dremap
.perm
[2 * i
+ cnt
++] = j
;
39188 for (; cnt
< 2; ++cnt
)
39189 dremap
.perm
[2 * i
+ cnt
] = 0;
39193 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39194 dfinal
.op1
= dfinal
.op0
;
39195 dfinal
.one_operand_p
= true;
39196 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39200 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39201 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39203 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39204 dfinal
.perm
[i
] |= nelt4
;
39206 gcc_unreachable ();
39209 ok
= expand_vec_perm_1 (&dremap
);
39212 ok
= expand_vec_perm_1 (&dfinal
);
39218 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39219 a vector permutation using two instructions, vperm2f128 resp.
39220 vperm2i128 followed by any single in-lane permutation. */
39223 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39225 struct expand_vec_perm_d dfirst
, dsecond
;
39226 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39230 || GET_MODE_SIZE (d
->vmode
) != 32
39231 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39235 dsecond
.one_operand_p
= false;
39236 dsecond
.testing_p
= true;
39238 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39239 immediate. For perm < 16 the second permutation uses
39240 d->op0 as first operand, for perm >= 16 it uses d->op1
39241 as first operand. The second operand is the result of
39243 for (perm
= 0; perm
< 32; perm
++)
39245 /* Ignore permutations which do not move anything cross-lane. */
39248 /* The second shuffle for e.g. V4DFmode has
39249 0123 and ABCD operands.
39250 Ignore AB23, as 23 is already in the second lane
39251 of the first operand. */
39252 if ((perm
& 0xc) == (1 << 2)) continue;
39253 /* And 01CD, as 01 is in the first lane of the first
39255 if ((perm
& 3) == 0) continue;
39256 /* And 4567, as then the vperm2[fi]128 doesn't change
39257 anything on the original 4567 second operand. */
39258 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39262 /* The second shuffle for e.g. V4DFmode has
39263 4567 and ABCD operands.
39264 Ignore AB67, as 67 is already in the second lane
39265 of the first operand. */
39266 if ((perm
& 0xc) == (3 << 2)) continue;
39267 /* And 45CD, as 45 is in the first lane of the first
39269 if ((perm
& 3) == 2) continue;
39270 /* And 0123, as then the vperm2[fi]128 doesn't change
39271 anything on the original 0123 first operand. */
39272 if ((perm
& 0xf) == (1 << 2)) continue;
39275 for (i
= 0; i
< nelt
; i
++)
39277 j
= d
->perm
[i
] / nelt2
;
39278 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
39279 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
39280 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
39281 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
39289 ok
= expand_vec_perm_1 (&dsecond
);
39300 /* Found a usable second shuffle. dfirst will be
39301 vperm2f128 on d->op0 and d->op1. */
39302 dsecond
.testing_p
= false;
39304 dfirst
.target
= gen_reg_rtx (d
->vmode
);
39305 for (i
= 0; i
< nelt
; i
++)
39306 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
39307 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
39309 ok
= expand_vec_perm_1 (&dfirst
);
39312 /* And dsecond is some single insn shuffle, taking
39313 d->op0 and result of vperm2f128 (if perm < 16) or
39314 d->op1 and result of vperm2f128 (otherwise). */
39315 dsecond
.op1
= dfirst
.target
;
39317 dsecond
.op0
= dfirst
.op1
;
39319 ok
= expand_vec_perm_1 (&dsecond
);
39325 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39326 if (d
->one_operand_p
)
39333 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39334 a two vector permutation using 2 intra-lane interleave insns
39335 and cross-lane shuffle for 32-byte vectors. */
39338 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
39341 rtx (*gen
) (rtx
, rtx
, rtx
);
39343 if (d
->one_operand_p
)
39345 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
39347 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
39353 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
39355 for (i
= 0; i
< nelt
; i
+= 2)
39356 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
39357 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
39367 gen
= gen_vec_interleave_highv32qi
;
39369 gen
= gen_vec_interleave_lowv32qi
;
39373 gen
= gen_vec_interleave_highv16hi
;
39375 gen
= gen_vec_interleave_lowv16hi
;
39379 gen
= gen_vec_interleave_highv8si
;
39381 gen
= gen_vec_interleave_lowv8si
;
39385 gen
= gen_vec_interleave_highv4di
;
39387 gen
= gen_vec_interleave_lowv4di
;
39391 gen
= gen_vec_interleave_highv8sf
;
39393 gen
= gen_vec_interleave_lowv8sf
;
39397 gen
= gen_vec_interleave_highv4df
;
39399 gen
= gen_vec_interleave_lowv4df
;
39402 gcc_unreachable ();
39405 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
39409 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39410 a single vector permutation using a single intra-lane vector
39411 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39412 the non-swapped and swapped vectors together. */
39415 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
39417 struct expand_vec_perm_d dfirst
, dsecond
;
39418 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39421 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
39425 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
39426 || !d
->one_operand_p
)
39430 for (i
= 0; i
< nelt
; i
++)
39431 dfirst
.perm
[i
] = 0xff;
39432 for (i
= 0, msk
= 0; i
< nelt
; i
++)
39434 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
39435 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
39437 dfirst
.perm
[j
] = d
->perm
[i
];
39441 for (i
= 0; i
< nelt
; i
++)
39442 if (dfirst
.perm
[i
] == 0xff)
39443 dfirst
.perm
[i
] = i
;
39446 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39449 ok
= expand_vec_perm_1 (&dfirst
);
39450 seq
= get_insns ();
39462 dsecond
.op0
= dfirst
.target
;
39463 dsecond
.op1
= dfirst
.target
;
39464 dsecond
.one_operand_p
= true;
39465 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39466 for (i
= 0; i
< nelt
; i
++)
39467 dsecond
.perm
[i
] = i
^ nelt2
;
39469 ok
= expand_vec_perm_1 (&dsecond
);
39472 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
39473 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
39477 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39478 permutation using two vperm2f128, followed by a vshufpd insn blending
39479 the two vectors together. */
39482 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
39484 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
39487 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
39497 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
39498 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
39499 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
39500 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
39501 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
39502 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
39503 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
39504 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
39505 dthird
.perm
[0] = (d
->perm
[0] % 2);
39506 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
39507 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
39508 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
39510 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39511 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39512 dthird
.op0
= dfirst
.target
;
39513 dthird
.op1
= dsecond
.target
;
39514 dthird
.one_operand_p
= false;
39516 canonicalize_perm (&dfirst
);
39517 canonicalize_perm (&dsecond
);
39519 ok
= expand_vec_perm_1 (&dfirst
)
39520 && expand_vec_perm_1 (&dsecond
)
39521 && expand_vec_perm_1 (&dthird
);
39528 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
39529 permutation with two pshufb insns and an ior. We should have already
39530 failed all two instruction sequences. */
39533 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
39535 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
39536 unsigned int i
, nelt
, eltsz
;
39538 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39540 gcc_assert (!d
->one_operand_p
);
39543 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39545 /* Generate two permutation masks. If the required element is within
39546 the given vector it is shuffled into the proper lane. If the required
39547 element is in the other vector, force a zero into the lane by setting
39548 bit 7 in the permutation mask. */
39549 m128
= GEN_INT (-128);
39550 for (i
= 0; i
< nelt
; ++i
)
39552 unsigned j
, e
= d
->perm
[i
];
39553 unsigned which
= (e
>= nelt
);
39557 for (j
= 0; j
< eltsz
; ++j
)
39559 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
39560 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
39564 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
39565 vperm
= force_reg (V16QImode
, vperm
);
39567 l
= gen_reg_rtx (V16QImode
);
39568 op
= gen_lowpart (V16QImode
, d
->op0
);
39569 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
39571 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
39572 vperm
= force_reg (V16QImode
, vperm
);
39574 h
= gen_reg_rtx (V16QImode
);
39575 op
= gen_lowpart (V16QImode
, d
->op1
);
39576 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
39578 op
= gen_lowpart (V16QImode
, d
->target
);
39579 emit_insn (gen_iorv16qi3 (op
, l
, h
));
39584 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
39585 with two vpshufb insns, vpermq and vpor. We should have already failed
39586 all two or three instruction sequences. */
39589 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
39591 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
39592 unsigned int i
, nelt
, eltsz
;
39595 || !d
->one_operand_p
39596 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39603 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39605 /* Generate two permutation masks. If the required element is within
39606 the same lane, it is shuffled in. If the required element from the
39607 other lane, force a zero by setting bit 7 in the permutation mask.
39608 In the other mask the mask has non-negative elements if element
39609 is requested from the other lane, but also moved to the other lane,
39610 so that the result of vpshufb can have the two V2TImode halves
39612 m128
= GEN_INT (-128);
39613 for (i
= 0; i
< nelt
; ++i
)
39615 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39616 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
39618 for (j
= 0; j
< eltsz
; ++j
)
39620 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
39621 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
39625 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39626 vperm
= force_reg (V32QImode
, vperm
);
39628 h
= gen_reg_rtx (V32QImode
);
39629 op
= gen_lowpart (V32QImode
, d
->op0
);
39630 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39632 /* Swap the 128-byte lanes of h into hp. */
39633 hp
= gen_reg_rtx (V4DImode
);
39634 op
= gen_lowpart (V4DImode
, h
);
39635 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
39638 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39639 vperm
= force_reg (V32QImode
, vperm
);
39641 l
= gen_reg_rtx (V32QImode
);
39642 op
= gen_lowpart (V32QImode
, d
->op0
);
39643 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39645 op
= gen_lowpart (V32QImode
, d
->target
);
39646 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
39651 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
39652 and extract-odd permutations of two V32QImode and V16QImode operand
39653 with two vpshufb insns, vpor and vpermq. We should have already
39654 failed all two or three instruction sequences. */
39657 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
39659 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
39660 unsigned int i
, nelt
, eltsz
;
39663 || d
->one_operand_p
39664 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39667 for (i
= 0; i
< d
->nelt
; ++i
)
39668 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
39675 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39677 /* Generate two permutation masks. In the first permutation mask
39678 the first quarter will contain indexes for the first half
39679 of the op0, the second quarter will contain bit 7 set, third quarter
39680 will contain indexes for the second half of the op0 and the
39681 last quarter bit 7 set. In the second permutation mask
39682 the first quarter will contain bit 7 set, the second quarter
39683 indexes for the first half of the op1, the third quarter bit 7 set
39684 and last quarter indexes for the second half of the op1.
39685 I.e. the first mask e.g. for V32QImode extract even will be:
39686 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
39687 (all values masked with 0xf except for -128) and second mask
39688 for extract even will be
39689 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
39690 m128
= GEN_INT (-128);
39691 for (i
= 0; i
< nelt
; ++i
)
39693 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39694 unsigned which
= d
->perm
[i
] >= nelt
;
39695 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
39697 for (j
= 0; j
< eltsz
; ++j
)
39699 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
39700 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
39704 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39705 vperm
= force_reg (V32QImode
, vperm
);
39707 l
= gen_reg_rtx (V32QImode
);
39708 op
= gen_lowpart (V32QImode
, d
->op0
);
39709 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39711 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39712 vperm
= force_reg (V32QImode
, vperm
);
39714 h
= gen_reg_rtx (V32QImode
);
39715 op
= gen_lowpart (V32QImode
, d
->op1
);
39716 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39718 ior
= gen_reg_rtx (V32QImode
);
39719 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
39721 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
39722 op
= gen_lowpart (V4DImode
, d
->target
);
39723 ior
= gen_lowpart (V4DImode
, ior
);
39724 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
39725 const1_rtx
, GEN_INT (3)));
39730 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
39731 and extract-odd permutations. */
39734 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
39741 t1
= gen_reg_rtx (V4DFmode
);
39742 t2
= gen_reg_rtx (V4DFmode
);
39744 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39745 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39746 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39748 /* Now an unpck[lh]pd will produce the result required. */
39750 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
39752 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
39758 int mask
= odd
? 0xdd : 0x88;
39760 t1
= gen_reg_rtx (V8SFmode
);
39761 t2
= gen_reg_rtx (V8SFmode
);
39762 t3
= gen_reg_rtx (V8SFmode
);
39764 /* Shuffle within the 128-bit lanes to produce:
39765 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
39766 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
39769 /* Shuffle the lanes around to produce:
39770 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
39771 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
39774 /* Shuffle within the 128-bit lanes to produce:
39775 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
39776 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
39778 /* Shuffle within the 128-bit lanes to produce:
39779 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
39780 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
39782 /* Shuffle the lanes around to produce:
39783 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
39784 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
39793 /* These are always directly implementable by expand_vec_perm_1. */
39794 gcc_unreachable ();
39798 return expand_vec_perm_pshufb2 (d
);
39801 /* We need 2*log2(N)-1 operations to achieve odd/even
39802 with interleave. */
39803 t1
= gen_reg_rtx (V8HImode
);
39804 t2
= gen_reg_rtx (V8HImode
);
39805 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
39806 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
39807 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
39808 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
39810 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
39812 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
39819 return expand_vec_perm_pshufb2 (d
);
39822 t1
= gen_reg_rtx (V16QImode
);
39823 t2
= gen_reg_rtx (V16QImode
);
39824 t3
= gen_reg_rtx (V16QImode
);
39825 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
39826 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
39827 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
39828 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
39829 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
39830 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
39832 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
39834 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
39841 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
39846 struct expand_vec_perm_d d_copy
= *d
;
39847 d_copy
.vmode
= V4DFmode
;
39848 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
39849 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
39850 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
39851 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39854 t1
= gen_reg_rtx (V4DImode
);
39855 t2
= gen_reg_rtx (V4DImode
);
39857 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39858 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39859 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39861 /* Now an vpunpck[lh]qdq will produce the result required. */
39863 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
39865 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
39872 struct expand_vec_perm_d d_copy
= *d
;
39873 d_copy
.vmode
= V8SFmode
;
39874 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
39875 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
39876 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
39877 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39880 t1
= gen_reg_rtx (V8SImode
);
39881 t2
= gen_reg_rtx (V8SImode
);
39883 /* Shuffle the lanes around into
39884 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
39885 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
39886 gen_lowpart (V4DImode
, d
->op0
),
39887 gen_lowpart (V4DImode
, d
->op1
),
39889 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
39890 gen_lowpart (V4DImode
, d
->op0
),
39891 gen_lowpart (V4DImode
, d
->op1
),
39894 /* Swap the 2nd and 3rd position in each lane into
39895 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
39896 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
39897 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
39898 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
39899 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
39901 /* Now an vpunpck[lh]qdq will produce
39902 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
39904 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
39905 gen_lowpart (V4DImode
, t1
),
39906 gen_lowpart (V4DImode
, t2
));
39908 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
39909 gen_lowpart (V4DImode
, t1
),
39910 gen_lowpart (V4DImode
, t2
));
39915 gcc_unreachable ();
39921 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
39922 extract-even and extract-odd permutations. */
39925 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
39927 unsigned i
, odd
, nelt
= d
->nelt
;
39930 if (odd
!= 0 && odd
!= 1)
39933 for (i
= 1; i
< nelt
; ++i
)
39934 if (d
->perm
[i
] != 2 * i
+ odd
)
39937 return expand_vec_perm_even_odd_1 (d
, odd
);
39940 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
39941 permutations. We assume that expand_vec_perm_1 has already failed. */
39944 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
39946 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
39947 enum machine_mode vmode
= d
->vmode
;
39948 unsigned char perm2
[4];
39956 /* These are special-cased in sse.md so that we can optionally
39957 use the vbroadcast instruction. They expand to two insns
39958 if the input happens to be in a register. */
39959 gcc_unreachable ();
39965 /* These are always implementable using standard shuffle patterns. */
39966 gcc_unreachable ();
39970 /* These can be implemented via interleave. We save one insn by
39971 stopping once we have promoted to V4SImode and then use pshufd. */
39975 rtx (*gen
) (rtx
, rtx
, rtx
)
39976 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
39977 : gen_vec_interleave_lowv8hi
;
39981 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
39982 : gen_vec_interleave_highv8hi
;
39987 dest
= gen_reg_rtx (vmode
);
39988 emit_insn (gen (dest
, op0
, op0
));
39989 vmode
= get_mode_wider_vector (vmode
);
39990 op0
= gen_lowpart (vmode
, dest
);
39992 while (vmode
!= V4SImode
);
39994 memset (perm2
, elt
, 4);
39995 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40004 /* For AVX2 broadcasts of the first element vpbroadcast* or
40005 vpermq should be used by expand_vec_perm_1. */
40006 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40010 gcc_unreachable ();
40014 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40015 broadcast permutations. */
40018 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40020 unsigned i
, elt
, nelt
= d
->nelt
;
40022 if (!d
->one_operand_p
)
40026 for (i
= 1; i
< nelt
; ++i
)
40027 if (d
->perm
[i
] != elt
)
40030 return expand_vec_perm_broadcast_1 (d
);
40033 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40034 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40035 all the shorter instruction sequences. */
40038 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40040 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40041 unsigned int i
, nelt
, eltsz
;
40045 || d
->one_operand_p
40046 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40053 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40055 /* Generate 4 permutation masks. If the required element is within
40056 the same lane, it is shuffled in. If the required element from the
40057 other lane, force a zero by setting bit 7 in the permutation mask.
40058 In the other mask the mask has non-negative elements if element
40059 is requested from the other lane, but also moved to the other lane,
40060 so that the result of vpshufb can have the two V2TImode halves
40062 m128
= GEN_INT (-128);
40063 for (i
= 0; i
< 32; ++i
)
40065 rperm
[0][i
] = m128
;
40066 rperm
[1][i
] = m128
;
40067 rperm
[2][i
] = m128
;
40068 rperm
[3][i
] = m128
;
40074 for (i
= 0; i
< nelt
; ++i
)
40076 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40077 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40078 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40080 for (j
= 0; j
< eltsz
; ++j
)
40081 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40082 used
[which
] = true;
40085 for (i
= 0; i
< 2; ++i
)
40087 if (!used
[2 * i
+ 1])
40092 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40093 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40094 vperm
= force_reg (V32QImode
, vperm
);
40095 h
[i
] = gen_reg_rtx (V32QImode
);
40096 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40097 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40100 /* Swap the 128-byte lanes of h[X]. */
40101 for (i
= 0; i
< 2; ++i
)
40103 if (h
[i
] == NULL_RTX
)
40105 op
= gen_reg_rtx (V4DImode
);
40106 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40107 const2_rtx
, GEN_INT (3), const0_rtx
,
40109 h
[i
] = gen_lowpart (V32QImode
, op
);
40112 for (i
= 0; i
< 2; ++i
)
40119 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40120 vperm
= force_reg (V32QImode
, vperm
);
40121 l
[i
] = gen_reg_rtx (V32QImode
);
40122 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40123 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40126 for (i
= 0; i
< 2; ++i
)
40130 op
= gen_reg_rtx (V32QImode
);
40131 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40138 gcc_assert (l
[0] && l
[1]);
40139 op
= gen_lowpart (V32QImode
, d
->target
);
40140 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40144 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40145 With all of the interface bits taken care of, perform the expansion
40146 in D and return true on success. */
40149 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40151 /* Try a single instruction expansion. */
40152 if (expand_vec_perm_1 (d
))
40155 /* Try sequences of two instructions. */
40157 if (expand_vec_perm_pshuflw_pshufhw (d
))
40160 if (expand_vec_perm_palignr (d
))
40163 if (expand_vec_perm_interleave2 (d
))
40166 if (expand_vec_perm_broadcast (d
))
40169 if (expand_vec_perm_vpermq_perm_1 (d
))
40172 if (expand_vec_perm_vperm2f128 (d
))
40175 /* Try sequences of three instructions. */
40177 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40180 if (expand_vec_perm_pshufb2 (d
))
40183 if (expand_vec_perm_interleave3 (d
))
40186 if (expand_vec_perm_vperm2f128_vblend (d
))
40189 /* Try sequences of four instructions. */
40191 if (expand_vec_perm_vpshufb2_vpermq (d
))
40194 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40197 /* ??? Look for narrow permutations whose element orderings would
40198 allow the promotion to a wider mode. */
40200 /* ??? Look for sequences of interleave or a wider permute that place
40201 the data into the correct lanes for a half-vector shuffle like
40202 pshuf[lh]w or vpermilps. */
40204 /* ??? Look for sequences of interleave that produce the desired results.
40205 The combinatorics of punpck[lh] get pretty ugly... */
40207 if (expand_vec_perm_even_odd (d
))
40210 /* Even longer sequences. */
40211 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40217 /* If a permutation only uses one operand, make it clear. Returns true
40218 if the permutation references both operands. */
40221 canonicalize_perm (struct expand_vec_perm_d
*d
)
40223 int i
, which
, nelt
= d
->nelt
;
40225 for (i
= which
= 0; i
< nelt
; ++i
)
40226 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40228 d
->one_operand_p
= true;
40235 if (!rtx_equal_p (d
->op0
, d
->op1
))
40237 d
->one_operand_p
= false;
40240 /* The elements of PERM do not suggest that only the first operand
40241 is used, but both operands are identical. Allow easier matching
40242 of the permutation by folding the permutation into the single
40247 for (i
= 0; i
< nelt
; ++i
)
40248 d
->perm
[i
] &= nelt
- 1;
40257 return (which
== 3);
40261 ix86_expand_vec_perm_const (rtx operands
[4])
40263 struct expand_vec_perm_d d
;
40264 unsigned char perm
[MAX_VECT_LEN
];
40269 d
.target
= operands
[0];
40270 d
.op0
= operands
[1];
40271 d
.op1
= operands
[2];
40274 d
.vmode
= GET_MODE (d
.target
);
40275 gcc_assert (VECTOR_MODE_P (d
.vmode
));
40276 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40277 d
.testing_p
= false;
40279 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
40280 gcc_assert (XVECLEN (sel
, 0) == nelt
);
40281 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
40283 for (i
= 0; i
< nelt
; ++i
)
40285 rtx e
= XVECEXP (sel
, 0, i
);
40286 int ei
= INTVAL (e
) & (2 * nelt
- 1);
40291 two_args
= canonicalize_perm (&d
);
40293 if (ix86_expand_vec_perm_const_1 (&d
))
40296 /* If the selector says both arguments are needed, but the operands are the
40297 same, the above tried to expand with one_operand_p and flattened selector.
40298 If that didn't work, retry without one_operand_p; we succeeded with that
40300 if (two_args
&& d
.one_operand_p
)
40302 d
.one_operand_p
= false;
40303 memcpy (d
.perm
, perm
, sizeof (perm
));
40304 return ix86_expand_vec_perm_const_1 (&d
);
40310 /* Implement targetm.vectorize.vec_perm_const_ok. */
40313 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
40314 const unsigned char *sel
)
40316 struct expand_vec_perm_d d
;
40317 unsigned int i
, nelt
, which
;
40321 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40322 d
.testing_p
= true;
40324 /* Given sufficient ISA support we can just return true here
40325 for selected vector modes. */
40326 if (GET_MODE_SIZE (d
.vmode
) == 16)
40328 /* All implementable with a single vpperm insn. */
40331 /* All implementable with 2 pshufb + 1 ior. */
40334 /* All implementable with shufpd or unpck[lh]pd. */
40339 /* Extract the values from the vector CST into the permutation
40341 memcpy (d
.perm
, sel
, nelt
);
40342 for (i
= which
= 0; i
< nelt
; ++i
)
40344 unsigned char e
= d
.perm
[i
];
40345 gcc_assert (e
< 2 * nelt
);
40346 which
|= (e
< nelt
? 1 : 2);
40349 /* For all elements from second vector, fold the elements to first. */
40351 for (i
= 0; i
< nelt
; ++i
)
40354 /* Check whether the mask can be applied to the vector type. */
40355 d
.one_operand_p
= (which
!= 3);
40357 /* Implementable with shufps or pshufd. */
40358 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
40361 /* Otherwise we have to go through the motions and see if we can
40362 figure out how to generate the requested permutation. */
40363 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
40364 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
40365 if (!d
.one_operand_p
)
40366 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
40369 ret
= ix86_expand_vec_perm_const_1 (&d
);
40376 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
40378 struct expand_vec_perm_d d
;
40384 d
.vmode
= GET_MODE (targ
);
40385 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40386 d
.one_operand_p
= false;
40387 d
.testing_p
= false;
40389 for (i
= 0; i
< nelt
; ++i
)
40390 d
.perm
[i
] = i
* 2 + odd
;
40392 /* We'll either be able to implement the permutation directly... */
40393 if (expand_vec_perm_1 (&d
))
40396 /* ... or we use the special-case patterns. */
40397 expand_vec_perm_even_odd_1 (&d
, odd
);
40401 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
40403 struct expand_vec_perm_d d
;
40404 unsigned i
, nelt
, base
;
40410 d
.vmode
= GET_MODE (targ
);
40411 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40412 d
.one_operand_p
= false;
40413 d
.testing_p
= false;
40415 base
= high_p
? nelt
/ 2 : 0;
40416 for (i
= 0; i
< nelt
/ 2; ++i
)
40418 d
.perm
[i
* 2] = i
+ base
;
40419 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
40422 /* Note that for AVX this isn't one instruction. */
40423 ok
= ix86_expand_vec_perm_const_1 (&d
);
40428 /* Expand a vector operation CODE for a V*QImode in terms of the
40429 same operation on V*HImode. */
40432 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
40434 enum machine_mode qimode
= GET_MODE (dest
);
40435 enum machine_mode himode
;
40436 rtx (*gen_il
) (rtx
, rtx
, rtx
);
40437 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
40438 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
40439 struct expand_vec_perm_d d
;
40440 bool ok
, full_interleave
;
40441 bool uns_p
= false;
40448 gen_il
= gen_vec_interleave_lowv16qi
;
40449 gen_ih
= gen_vec_interleave_highv16qi
;
40452 himode
= V16HImode
;
40453 gen_il
= gen_avx2_interleave_lowv32qi
;
40454 gen_ih
= gen_avx2_interleave_highv32qi
;
40457 gcc_unreachable ();
40460 op2_l
= op2_h
= op2
;
40464 /* Unpack data such that we've got a source byte in each low byte of
40465 each word. We don't care what goes into the high byte of each word.
40466 Rather than trying to get zero in there, most convenient is to let
40467 it be a copy of the low byte. */
40468 op2_l
= gen_reg_rtx (qimode
);
40469 op2_h
= gen_reg_rtx (qimode
);
40470 emit_insn (gen_il (op2_l
, op2
, op2
));
40471 emit_insn (gen_ih (op2_h
, op2
, op2
));
40474 op1_l
= gen_reg_rtx (qimode
);
40475 op1_h
= gen_reg_rtx (qimode
);
40476 emit_insn (gen_il (op1_l
, op1
, op1
));
40477 emit_insn (gen_ih (op1_h
, op1
, op1
));
40478 full_interleave
= qimode
== V16QImode
;
40486 op1_l
= gen_reg_rtx (himode
);
40487 op1_h
= gen_reg_rtx (himode
);
40488 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
40489 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
40490 full_interleave
= true;
40493 gcc_unreachable ();
40496 /* Perform the operation. */
40497 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
40499 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
40501 gcc_assert (res_l
&& res_h
);
40503 /* Merge the data back into the right place. */
40505 d
.op0
= gen_lowpart (qimode
, res_l
);
40506 d
.op1
= gen_lowpart (qimode
, res_h
);
40508 d
.nelt
= GET_MODE_NUNITS (qimode
);
40509 d
.one_operand_p
= false;
40510 d
.testing_p
= false;
40512 if (full_interleave
)
40514 /* For SSE2, we used an full interleave, so the desired
40515 results are in the even elements. */
40516 for (i
= 0; i
< 32; ++i
)
40521 /* For AVX, the interleave used above was not cross-lane. So the
40522 extraction is evens but with the second and third quarter swapped.
40523 Happily, that is even one insn shorter than even extraction. */
40524 for (i
= 0; i
< 32; ++i
)
40525 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
40528 ok
= ix86_expand_vec_perm_const_1 (&d
);
40531 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40532 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
40536 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
40537 bool uns_p
, bool odd_p
)
40539 enum machine_mode mode
= GET_MODE (op1
);
40540 enum machine_mode wmode
= GET_MODE (dest
);
40543 /* We only play even/odd games with vectors of SImode. */
40544 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
40546 /* If we're looking for the odd results, shift those members down to
40547 the even slots. For some cpus this is faster than a PSHUFD. */
40550 if (TARGET_XOP
&& mode
== V4SImode
)
40552 x
= force_reg (wmode
, CONST0_RTX (wmode
));
40553 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
40557 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
40558 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
40559 x
, NULL
, 1, OPTAB_DIRECT
);
40560 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
40561 x
, NULL
, 1, OPTAB_DIRECT
);
40562 op1
= gen_lowpart (mode
, op1
);
40563 op2
= gen_lowpart (mode
, op2
);
40566 if (mode
== V8SImode
)
40569 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
40571 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
40574 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
40575 else if (TARGET_SSE4_1
)
40576 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
40579 rtx s1
, s2
, t0
, t1
, t2
;
40581 /* The easiest way to implement this without PMULDQ is to go through
40582 the motions as if we are performing a full 64-bit multiply. With
40583 the exception that we need to do less shuffling of the elements. */
40585 /* Compute the sign-extension, aka highparts, of the two operands. */
40586 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40587 op1
, pc_rtx
, pc_rtx
);
40588 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40589 op2
, pc_rtx
, pc_rtx
);
40591 /* Multiply LO(A) * HI(B), and vice-versa. */
40592 t1
= gen_reg_rtx (wmode
);
40593 t2
= gen_reg_rtx (wmode
);
40594 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
40595 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
40597 /* Multiply LO(A) * LO(B). */
40598 t0
= gen_reg_rtx (wmode
);
40599 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
40601 /* Combine and shift the highparts into place. */
40602 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
40603 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
40606 /* Combine high and low parts. */
40607 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
40614 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
40615 bool uns_p
, bool high_p
)
40617 enum machine_mode wmode
= GET_MODE (dest
);
40618 enum machine_mode mode
= GET_MODE (op1
);
40619 rtx t1
, t2
, t3
, t4
, mask
;
40624 t1
= gen_reg_rtx (mode
);
40625 t2
= gen_reg_rtx (mode
);
40626 if (TARGET_XOP
&& !uns_p
)
40628 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
40629 shuffle the elements once so that all elements are in the right
40630 place for immediate use: { A C B D }. */
40631 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
40632 const1_rtx
, GEN_INT (3)));
40633 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
40634 const1_rtx
, GEN_INT (3)));
40638 /* Put the elements into place for the multiply. */
40639 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
40640 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
40643 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
40647 /* Shuffle the elements between the lanes. After this we
40648 have { A B E F | C D G H } for each operand. */
40649 t1
= gen_reg_rtx (V4DImode
);
40650 t2
= gen_reg_rtx (V4DImode
);
40651 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
40652 const0_rtx
, const2_rtx
,
40653 const1_rtx
, GEN_INT (3)));
40654 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
40655 const0_rtx
, const2_rtx
,
40656 const1_rtx
, GEN_INT (3)));
40658 /* Shuffle the elements within the lanes. After this we
40659 have { A A B B | C C D D } or { E E F F | G G H H }. */
40660 t3
= gen_reg_rtx (V8SImode
);
40661 t4
= gen_reg_rtx (V8SImode
);
40662 mask
= GEN_INT (high_p
40663 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
40664 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
40665 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
40666 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
40668 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
40673 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
40674 uns_p
, OPTAB_DIRECT
);
40675 t2
= expand_binop (mode
,
40676 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
40677 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
40678 gcc_assert (t1
&& t2
);
40680 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
40685 t1
= gen_reg_rtx (wmode
);
40686 t2
= gen_reg_rtx (wmode
);
40687 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
40688 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
40690 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
40694 gcc_unreachable ();
40699 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
40703 res_1
= gen_reg_rtx (V4SImode
);
40704 res_2
= gen_reg_rtx (V4SImode
);
40705 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
40706 op1
, op2
, true, false);
40707 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
40708 op1
, op2
, true, true);
40710 /* Move the results in element 2 down to element 1; we don't care
40711 what goes in elements 2 and 3. Then we can merge the parts
40712 back together with an interleave.
40714 Note that two other sequences were tried:
40715 (1) Use interleaves at the start instead of psrldq, which allows
40716 us to use a single shufps to merge things back at the end.
40717 (2) Use shufps here to combine the two vectors, then pshufd to
40718 put the elements in the correct order.
40719 In both cases the cost of the reformatting stall was too high
40720 and the overall sequence slower. */
40722 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
40723 const0_rtx
, const0_rtx
));
40724 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
40725 const0_rtx
, const0_rtx
));
40726 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
40728 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
40732 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
40734 enum machine_mode mode
= GET_MODE (op0
);
40735 rtx t1
, t2
, t3
, t4
, t5
, t6
;
40737 if (TARGET_XOP
&& mode
== V2DImode
)
40739 /* op1: A,B,C,D, op2: E,F,G,H */
40740 op1
= gen_lowpart (V4SImode
, op1
);
40741 op2
= gen_lowpart (V4SImode
, op2
);
40743 t1
= gen_reg_rtx (V4SImode
);
40744 t2
= gen_reg_rtx (V4SImode
);
40745 t3
= gen_reg_rtx (V2DImode
);
40746 t4
= gen_reg_rtx (V2DImode
);
40749 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
40755 /* t2: (B*E),(A*F),(D*G),(C*H) */
40756 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
40758 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
40759 emit_insn (gen_xop_phadddq (t3
, t2
));
40761 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
40762 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
40764 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
40765 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
40769 enum machine_mode nmode
;
40770 rtx (*umul
) (rtx
, rtx
, rtx
);
40772 if (mode
== V2DImode
)
40774 umul
= gen_vec_widen_umult_even_v4si
;
40777 else if (mode
== V4DImode
)
40779 umul
= gen_vec_widen_umult_even_v8si
;
40783 gcc_unreachable ();
40786 /* Multiply low parts. */
40787 t1
= gen_reg_rtx (mode
);
40788 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
40790 /* Shift input vectors right 32 bits so we can multiply high parts. */
40792 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
40793 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
40795 /* Multiply high parts by low parts. */
40796 t4
= gen_reg_rtx (mode
);
40797 t5
= gen_reg_rtx (mode
);
40798 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
40799 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
40801 /* Combine and shift the highparts back. */
40802 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
40803 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
40805 /* Combine high and low parts. */
40806 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
40809 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40810 gen_rtx_MULT (mode
, op1
, op2
));
40813 /* Expand an insert into a vector register through pinsr insn.
40814 Return true if successful. */
40817 ix86_expand_pinsr (rtx
*operands
)
40819 rtx dst
= operands
[0];
40820 rtx src
= operands
[3];
40822 unsigned int size
= INTVAL (operands
[1]);
40823 unsigned int pos
= INTVAL (operands
[2]);
40825 if (GET_CODE (dst
) == SUBREG
)
40827 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
40828 dst
= SUBREG_REG (dst
);
40831 if (GET_CODE (src
) == SUBREG
)
40832 src
= SUBREG_REG (src
);
40834 switch (GET_MODE (dst
))
40841 enum machine_mode srcmode
, dstmode
;
40842 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
40844 srcmode
= mode_for_size (size
, MODE_INT
, 0);
40849 if (!TARGET_SSE4_1
)
40851 dstmode
= V16QImode
;
40852 pinsr
= gen_sse4_1_pinsrb
;
40858 dstmode
= V8HImode
;
40859 pinsr
= gen_sse2_pinsrw
;
40863 if (!TARGET_SSE4_1
)
40865 dstmode
= V4SImode
;
40866 pinsr
= gen_sse4_1_pinsrd
;
40870 gcc_assert (TARGET_64BIT
);
40871 if (!TARGET_SSE4_1
)
40873 dstmode
= V2DImode
;
40874 pinsr
= gen_sse4_1_pinsrq
;
40881 dst
= gen_lowpart (dstmode
, dst
);
40882 src
= gen_lowpart (srcmode
, src
);
40886 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
40895 /* This function returns the calling abi specific va_list type node.
40896 It returns the FNDECL specific va_list type. */
40899 ix86_fn_abi_va_list (tree fndecl
)
40902 return va_list_type_node
;
40903 gcc_assert (fndecl
!= NULL_TREE
);
40905 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
40906 return ms_va_list_type_node
;
40908 return sysv_va_list_type_node
;
40911 /* Returns the canonical va_list type specified by TYPE. If there
40912 is no valid TYPE provided, it return NULL_TREE. */
40915 ix86_canonical_va_list_type (tree type
)
40919 /* Resolve references and pointers to va_list type. */
40920 if (TREE_CODE (type
) == MEM_REF
)
40921 type
= TREE_TYPE (type
);
40922 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
40923 type
= TREE_TYPE (type
);
40924 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
40925 type
= TREE_TYPE (type
);
40927 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
40929 wtype
= va_list_type_node
;
40930 gcc_assert (wtype
!= NULL_TREE
);
40932 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
40934 /* If va_list is an array type, the argument may have decayed
40935 to a pointer type, e.g. by being passed to another function.
40936 In that case, unwrap both types so that we can compare the
40937 underlying records. */
40938 if (TREE_CODE (htype
) == ARRAY_TYPE
40939 || POINTER_TYPE_P (htype
))
40941 wtype
= TREE_TYPE (wtype
);
40942 htype
= TREE_TYPE (htype
);
40945 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
40946 return va_list_type_node
;
40947 wtype
= sysv_va_list_type_node
;
40948 gcc_assert (wtype
!= NULL_TREE
);
40950 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
40952 /* If va_list is an array type, the argument may have decayed
40953 to a pointer type, e.g. by being passed to another function.
40954 In that case, unwrap both types so that we can compare the
40955 underlying records. */
40956 if (TREE_CODE (htype
) == ARRAY_TYPE
40957 || POINTER_TYPE_P (htype
))
40959 wtype
= TREE_TYPE (wtype
);
40960 htype
= TREE_TYPE (htype
);
40963 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
40964 return sysv_va_list_type_node
;
40965 wtype
= ms_va_list_type_node
;
40966 gcc_assert (wtype
!= NULL_TREE
);
40968 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
40970 /* If va_list is an array type, the argument may have decayed
40971 to a pointer type, e.g. by being passed to another function.
40972 In that case, unwrap both types so that we can compare the
40973 underlying records. */
40974 if (TREE_CODE (htype
) == ARRAY_TYPE
40975 || POINTER_TYPE_P (htype
))
40977 wtype
= TREE_TYPE (wtype
);
40978 htype
= TREE_TYPE (htype
);
40981 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
40982 return ms_va_list_type_node
;
40985 return std_canonical_va_list_type (type
);
40988 /* Iterate through the target-specific builtin types for va_list.
40989 IDX denotes the iterator, *PTREE is set to the result type of
40990 the va_list builtin, and *PNAME to its internal type.
40991 Returns zero if there is no element for this index, otherwise
40992 IDX should be increased upon the next call.
40993 Note, do not iterate a base builtin's name like __builtin_va_list.
40994 Used from c_common_nodes_and_builtins. */
40997 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41007 *ptree
= ms_va_list_type_node
;
41008 *pname
= "__builtin_ms_va_list";
41012 *ptree
= sysv_va_list_type_node
;
41013 *pname
= "__builtin_sysv_va_list";
41021 #undef TARGET_SCHED_DISPATCH
41022 #define TARGET_SCHED_DISPATCH has_dispatch
41023 #undef TARGET_SCHED_DISPATCH_DO
41024 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41025 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41026 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41027 #undef TARGET_SCHED_REORDER
41028 #define TARGET_SCHED_REORDER ix86_sched_reorder
41029 #undef TARGET_SCHED_ADJUST_PRIORITY
41030 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41031 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41032 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
41034 /* The size of the dispatch window is the total number of bytes of
41035 object code allowed in a window. */
41036 #define DISPATCH_WINDOW_SIZE 16
41038 /* Number of dispatch windows considered for scheduling. */
41039 #define MAX_DISPATCH_WINDOWS 3
41041 /* Maximum number of instructions in a window. */
41044 /* Maximum number of immediate operands in a window. */
41047 /* Maximum number of immediate bits allowed in a window. */
41048 #define MAX_IMM_SIZE 128
41050 /* Maximum number of 32 bit immediates allowed in a window. */
41051 #define MAX_IMM_32 4
41053 /* Maximum number of 64 bit immediates allowed in a window. */
41054 #define MAX_IMM_64 2
41056 /* Maximum total of loads or prefetches allowed in a window. */
41059 /* Maximum total of stores allowed in a window. */
41060 #define MAX_STORE 1
41066 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41067 enum dispatch_group
{
41082 /* Number of allowable groups in a dispatch window. It is an array
41083 indexed by dispatch_group enum. 100 is used as a big number,
41084 because the number of these kind of operations does not have any
41085 effect in dispatch window, but we need them for other reasons in
41087 static unsigned int num_allowable_groups
[disp_last
] = {
41088 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41091 char group_name
[disp_last
+ 1][16] = {
41092 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41093 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41094 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41097 /* Instruction path. */
41100 path_single
, /* Single micro op. */
41101 path_double
, /* Double micro op. */
41102 path_multi
, /* Instructions with more than 2 micro op.. */
41106 /* sched_insn_info defines a window to the instructions scheduled in
41107 the basic block. It contains a pointer to the insn_info table and
41108 the instruction scheduled.
41110 Windows are allocated for each basic block and are linked
41112 typedef struct sched_insn_info_s
{
41114 enum dispatch_group group
;
41115 enum insn_path path
;
41120 /* Linked list of dispatch windows. This is a two way list of
41121 dispatch windows of a basic block. It contains information about
41122 the number of uops in the window and the total number of
41123 instructions and of bytes in the object code for this dispatch
41125 typedef struct dispatch_windows_s
{
41126 int num_insn
; /* Number of insn in the window. */
41127 int num_uops
; /* Number of uops in the window. */
41128 int window_size
; /* Number of bytes in the window. */
41129 int window_num
; /* Window number between 0 or 1. */
41130 int num_imm
; /* Number of immediates in an insn. */
41131 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41132 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41133 int imm_size
; /* Total immediates in the window. */
41134 int num_loads
; /* Total memory loads in the window. */
41135 int num_stores
; /* Total memory stores in the window. */
41136 int violation
; /* Violation exists in window. */
41137 sched_insn_info
*window
; /* Pointer to the window. */
41138 struct dispatch_windows_s
*next
;
41139 struct dispatch_windows_s
*prev
;
41140 } dispatch_windows
;
41142 /* Immediate valuse used in an insn. */
41143 typedef struct imm_info_s
41150 static dispatch_windows
*dispatch_window_list
;
41151 static dispatch_windows
*dispatch_window_list1
;
41153 /* Get dispatch group of insn. */
41155 static enum dispatch_group
41156 get_mem_group (rtx insn
)
41158 enum attr_memory memory
;
41160 if (INSN_CODE (insn
) < 0)
41161 return disp_no_group
;
41162 memory
= get_attr_memory (insn
);
41163 if (memory
== MEMORY_STORE
)
41166 if (memory
== MEMORY_LOAD
)
41169 if (memory
== MEMORY_BOTH
)
41170 return disp_load_store
;
41172 return disp_no_group
;
41175 /* Return true if insn is a compare instruction. */
41180 enum attr_type type
;
41182 type
= get_attr_type (insn
);
41183 return (type
== TYPE_TEST
41184 || type
== TYPE_ICMP
41185 || type
== TYPE_FCMP
41186 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41189 /* Return true if a dispatch violation encountered. */
41192 dispatch_violation (void)
41194 if (dispatch_window_list
->next
)
41195 return dispatch_window_list
->next
->violation
;
41196 return dispatch_window_list
->violation
;
41199 /* Return true if insn is a branch instruction. */
41202 is_branch (rtx insn
)
41204 return (CALL_P (insn
) || JUMP_P (insn
));
41207 /* Return true if insn is a prefetch instruction. */
41210 is_prefetch (rtx insn
)
41212 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41215 /* This function initializes a dispatch window and the list container holding a
41216 pointer to the window. */
41219 init_window (int window_num
)
41222 dispatch_windows
*new_list
;
41224 if (window_num
== 0)
41225 new_list
= dispatch_window_list
;
41227 new_list
= dispatch_window_list1
;
41229 new_list
->num_insn
= 0;
41230 new_list
->num_uops
= 0;
41231 new_list
->window_size
= 0;
41232 new_list
->next
= NULL
;
41233 new_list
->prev
= NULL
;
41234 new_list
->window_num
= window_num
;
41235 new_list
->num_imm
= 0;
41236 new_list
->num_imm_32
= 0;
41237 new_list
->num_imm_64
= 0;
41238 new_list
->imm_size
= 0;
41239 new_list
->num_loads
= 0;
41240 new_list
->num_stores
= 0;
41241 new_list
->violation
= false;
41243 for (i
= 0; i
< MAX_INSN
; i
++)
41245 new_list
->window
[i
].insn
= NULL
;
41246 new_list
->window
[i
].group
= disp_no_group
;
41247 new_list
->window
[i
].path
= no_path
;
41248 new_list
->window
[i
].byte_len
= 0;
41249 new_list
->window
[i
].imm_bytes
= 0;
41254 /* This function allocates and initializes a dispatch window and the
41255 list container holding a pointer to the window. */
41257 static dispatch_windows
*
41258 allocate_window (void)
41260 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
41261 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
41266 /* This routine initializes the dispatch scheduling information. It
41267 initiates building dispatch scheduler tables and constructs the
41268 first dispatch window. */
41271 init_dispatch_sched (void)
41273 /* Allocate a dispatch list and a window. */
41274 dispatch_window_list
= allocate_window ();
41275 dispatch_window_list1
= allocate_window ();
41280 /* This function returns true if a branch is detected. End of a basic block
41281 does not have to be a branch, but here we assume only branches end a
41285 is_end_basic_block (enum dispatch_group group
)
41287 return group
== disp_branch
;
41290 /* This function is called when the end of a window processing is reached. */
41293 process_end_window (void)
41295 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
41296 if (dispatch_window_list
->next
)
41298 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
41299 gcc_assert (dispatch_window_list
->window_size
41300 + dispatch_window_list1
->window_size
<= 48);
41306 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41307 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41308 for 48 bytes of instructions. Note that these windows are not dispatch
41309 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41311 static dispatch_windows
*
41312 allocate_next_window (int window_num
)
41314 if (window_num
== 0)
41316 if (dispatch_window_list
->next
)
41319 return dispatch_window_list
;
41322 dispatch_window_list
->next
= dispatch_window_list1
;
41323 dispatch_window_list1
->prev
= dispatch_window_list
;
41325 return dispatch_window_list1
;
41328 /* Increment the number of immediate operands of an instruction. */
41331 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
41336 switch ( GET_CODE (*in_rtx
))
41341 (imm_values
->imm
)++;
41342 if (x86_64_immediate_operand (*in_rtx
, SImode
))
41343 (imm_values
->imm32
)++;
41345 (imm_values
->imm64
)++;
41349 (imm_values
->imm
)++;
41350 (imm_values
->imm64
)++;
41354 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
41356 (imm_values
->imm
)++;
41357 (imm_values
->imm32
)++;
41368 /* Compute number of immediate operands of an instruction. */
41371 find_constant (rtx in_rtx
, imm_info
*imm_values
)
41373 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
41374 (rtx_function
) find_constant_1
, (void *) imm_values
);
41377 /* Return total size of immediate operands of an instruction along with number
41378 of corresponding immediate-operands. It initializes its parameters to zero
41379 befor calling FIND_CONSTANT.
41380 INSN is the input instruction. IMM is the total of immediates.
41381 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41385 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
41387 imm_info imm_values
= {0, 0, 0};
41389 find_constant (insn
, &imm_values
);
41390 *imm
= imm_values
.imm
;
41391 *imm32
= imm_values
.imm32
;
41392 *imm64
= imm_values
.imm64
;
41393 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
41396 /* This function indicates if an operand of an instruction is an
41400 has_immediate (rtx insn
)
41402 int num_imm_operand
;
41403 int num_imm32_operand
;
41404 int num_imm64_operand
;
41407 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41408 &num_imm64_operand
);
41412 /* Return single or double path for instructions. */
41414 static enum insn_path
41415 get_insn_path (rtx insn
)
41417 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
41419 if ((int)path
== 0)
41420 return path_single
;
41422 if ((int)path
== 1)
41423 return path_double
;
41428 /* Return insn dispatch group. */
41430 static enum dispatch_group
41431 get_insn_group (rtx insn
)
41433 enum dispatch_group group
= get_mem_group (insn
);
41437 if (is_branch (insn
))
41438 return disp_branch
;
41443 if (has_immediate (insn
))
41446 if (is_prefetch (insn
))
41447 return disp_prefetch
;
41449 return disp_no_group
;
41452 /* Count number of GROUP restricted instructions in a dispatch
41453 window WINDOW_LIST. */
41456 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
41458 enum dispatch_group group
= get_insn_group (insn
);
41460 int num_imm_operand
;
41461 int num_imm32_operand
;
41462 int num_imm64_operand
;
41464 if (group
== disp_no_group
)
41467 if (group
== disp_imm
)
41469 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41470 &num_imm64_operand
);
41471 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
41472 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
41473 || (num_imm32_operand
> 0
41474 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
41475 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
41476 || (num_imm64_operand
> 0
41477 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
41478 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
41479 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
41480 && num_imm64_operand
> 0
41481 && ((window_list
->num_imm_64
> 0
41482 && window_list
->num_insn
>= 2)
41483 || window_list
->num_insn
>= 3)))
41489 if ((group
== disp_load_store
41490 && (window_list
->num_loads
>= MAX_LOAD
41491 || window_list
->num_stores
>= MAX_STORE
))
41492 || ((group
== disp_load
41493 || group
== disp_prefetch
)
41494 && window_list
->num_loads
>= MAX_LOAD
)
41495 || (group
== disp_store
41496 && window_list
->num_stores
>= MAX_STORE
))
41502 /* This function returns true if insn satisfies dispatch rules on the
41503 last window scheduled. */
41506 fits_dispatch_window (rtx insn
)
41508 dispatch_windows
*window_list
= dispatch_window_list
;
41509 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
41510 unsigned int num_restrict
;
41511 enum dispatch_group group
= get_insn_group (insn
);
41512 enum insn_path path
= get_insn_path (insn
);
41515 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
41516 instructions should be given the lowest priority in the
41517 scheduling process in Haifa scheduler to make sure they will be
41518 scheduled in the same dispatch window as the reference to them. */
41519 if (group
== disp_jcc
|| group
== disp_cmp
)
41522 /* Check nonrestricted. */
41523 if (group
== disp_no_group
|| group
== disp_branch
)
41526 /* Get last dispatch window. */
41527 if (window_list_next
)
41528 window_list
= window_list_next
;
41530 if (window_list
->window_num
== 1)
41532 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
41535 || (min_insn_size (insn
) + sum
) >= 48)
41536 /* Window 1 is full. Go for next window. */
41540 num_restrict
= count_num_restricted (insn
, window_list
);
41542 if (num_restrict
> num_allowable_groups
[group
])
41545 /* See if it fits in the first window. */
41546 if (window_list
->window_num
== 0)
41548 /* The first widow should have only single and double path
41550 if (path
== path_double
41551 && (window_list
->num_uops
+ 2) > MAX_INSN
)
41553 else if (path
!= path_single
)
41559 /* Add an instruction INSN with NUM_UOPS micro-operations to the
41560 dispatch window WINDOW_LIST. */
41563 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
41565 int byte_len
= min_insn_size (insn
);
41566 int num_insn
= window_list
->num_insn
;
41568 sched_insn_info
*window
= window_list
->window
;
41569 enum dispatch_group group
= get_insn_group (insn
);
41570 enum insn_path path
= get_insn_path (insn
);
41571 int num_imm_operand
;
41572 int num_imm32_operand
;
41573 int num_imm64_operand
;
41575 if (!window_list
->violation
&& group
!= disp_cmp
41576 && !fits_dispatch_window (insn
))
41577 window_list
->violation
= true;
41579 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41580 &num_imm64_operand
);
41582 /* Initialize window with new instruction. */
41583 window
[num_insn
].insn
= insn
;
41584 window
[num_insn
].byte_len
= byte_len
;
41585 window
[num_insn
].group
= group
;
41586 window
[num_insn
].path
= path
;
41587 window
[num_insn
].imm_bytes
= imm_size
;
41589 window_list
->window_size
+= byte_len
;
41590 window_list
->num_insn
= num_insn
+ 1;
41591 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
41592 window_list
->imm_size
+= imm_size
;
41593 window_list
->num_imm
+= num_imm_operand
;
41594 window_list
->num_imm_32
+= num_imm32_operand
;
41595 window_list
->num_imm_64
+= num_imm64_operand
;
41597 if (group
== disp_store
)
41598 window_list
->num_stores
+= 1;
41599 else if (group
== disp_load
41600 || group
== disp_prefetch
)
41601 window_list
->num_loads
+= 1;
41602 else if (group
== disp_load_store
)
41604 window_list
->num_stores
+= 1;
41605 window_list
->num_loads
+= 1;
41609 /* Adds a scheduled instruction, INSN, to the current dispatch window.
41610 If the total bytes of instructions or the number of instructions in
41611 the window exceed allowable, it allocates a new window. */
41614 add_to_dispatch_window (rtx insn
)
41617 dispatch_windows
*window_list
;
41618 dispatch_windows
*next_list
;
41619 dispatch_windows
*window0_list
;
41620 enum insn_path path
;
41621 enum dispatch_group insn_group
;
41629 if (INSN_CODE (insn
) < 0)
41632 byte_len
= min_insn_size (insn
);
41633 window_list
= dispatch_window_list
;
41634 next_list
= window_list
->next
;
41635 path
= get_insn_path (insn
);
41636 insn_group
= get_insn_group (insn
);
41638 /* Get the last dispatch window. */
41640 window_list
= dispatch_window_list
->next
;
41642 if (path
== path_single
)
41644 else if (path
== path_double
)
41647 insn_num_uops
= (int) path
;
41649 /* If current window is full, get a new window.
41650 Window number zero is full, if MAX_INSN uops are scheduled in it.
41651 Window number one is full, if window zero's bytes plus window
41652 one's bytes is 32, or if the bytes of the new instruction added
41653 to the total makes it greater than 48, or it has already MAX_INSN
41654 instructions in it. */
41655 num_insn
= window_list
->num_insn
;
41656 num_uops
= window_list
->num_uops
;
41657 window_num
= window_list
->window_num
;
41658 insn_fits
= fits_dispatch_window (insn
);
41660 if (num_insn
>= MAX_INSN
41661 || num_uops
+ insn_num_uops
> MAX_INSN
41664 window_num
= ~window_num
& 1;
41665 window_list
= allocate_next_window (window_num
);
41668 if (window_num
== 0)
41670 add_insn_window (insn
, window_list
, insn_num_uops
);
41671 if (window_list
->num_insn
>= MAX_INSN
41672 && insn_group
== disp_branch
)
41674 process_end_window ();
41678 else if (window_num
== 1)
41680 window0_list
= window_list
->prev
;
41681 sum
= window0_list
->window_size
+ window_list
->window_size
;
41683 || (byte_len
+ sum
) >= 48)
41685 process_end_window ();
41686 window_list
= dispatch_window_list
;
41689 add_insn_window (insn
, window_list
, insn_num_uops
);
41692 gcc_unreachable ();
41694 if (is_end_basic_block (insn_group
))
41696 /* End of basic block is reached do end-basic-block process. */
41697 process_end_window ();
41702 /* Print the dispatch window, WINDOW_NUM, to FILE. */
41704 DEBUG_FUNCTION
static void
41705 debug_dispatch_window_file (FILE *file
, int window_num
)
41707 dispatch_windows
*list
;
41710 if (window_num
== 0)
41711 list
= dispatch_window_list
;
41713 list
= dispatch_window_list1
;
41715 fprintf (file
, "Window #%d:\n", list
->window_num
);
41716 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
41717 list
->num_insn
, list
->num_uops
, list
->window_size
);
41718 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41719 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
41721 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
41723 fprintf (file
, " insn info:\n");
41725 for (i
= 0; i
< MAX_INSN
; i
++)
41727 if (!list
->window
[i
].insn
)
41729 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
41730 i
, group_name
[list
->window
[i
].group
],
41731 i
, (void *)list
->window
[i
].insn
,
41732 i
, list
->window
[i
].path
,
41733 i
, list
->window
[i
].byte_len
,
41734 i
, list
->window
[i
].imm_bytes
);
41738 /* Print to stdout a dispatch window. */
41740 DEBUG_FUNCTION
void
41741 debug_dispatch_window (int window_num
)
41743 debug_dispatch_window_file (stdout
, window_num
);
41746 /* Print INSN dispatch information to FILE. */
41748 DEBUG_FUNCTION
static void
41749 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
41752 enum insn_path path
;
41753 enum dispatch_group group
;
41755 int num_imm_operand
;
41756 int num_imm32_operand
;
41757 int num_imm64_operand
;
41759 if (INSN_CODE (insn
) < 0)
41762 byte_len
= min_insn_size (insn
);
41763 path
= get_insn_path (insn
);
41764 group
= get_insn_group (insn
);
41765 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41766 &num_imm64_operand
);
41768 fprintf (file
, " insn info:\n");
41769 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
41770 group_name
[group
], path
, byte_len
);
41771 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41772 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
41775 /* Print to STDERR the status of the ready list with respect to
41776 dispatch windows. */
41778 DEBUG_FUNCTION
void
41779 debug_ready_dispatch (void)
41782 int no_ready
= number_in_ready ();
41784 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
41786 for (i
= 0; i
< no_ready
; i
++)
41787 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
41790 /* This routine is the driver of the dispatch scheduler. */
41793 do_dispatch (rtx insn
, int mode
)
41795 if (mode
== DISPATCH_INIT
)
41796 init_dispatch_sched ();
41797 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
41798 add_to_dispatch_window (insn
);
41801 /* Return TRUE if Dispatch Scheduling is supported. */
41804 has_dispatch (rtx insn
, int action
)
41806 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
41807 && flag_dispatch_scheduler
)
41813 case IS_DISPATCH_ON
:
41818 return is_cmp (insn
);
41820 case DISPATCH_VIOLATION
:
41821 return dispatch_violation ();
41823 case FITS_DISPATCH_WINDOW
:
41824 return fits_dispatch_window (insn
);
41830 /* Implementation of reassociation_width target hook used by
41831 reassoc phase to identify parallelism level in reassociated
41832 tree. Statements tree_code is passed in OPC. Arguments type
41835 Currently parallel reassociation is enabled for Atom
41836 processors only and we set reassociation width to be 2
41837 because Atom may issue up to 2 instructions per cycle.
41839 Return value should be fixed if parallel reassociation is
41840 enabled for other processors. */
41843 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
41844 enum machine_mode mode
)
41848 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
41850 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
41856 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
41857 place emms and femms instructions. */
41859 static enum machine_mode
41860 ix86_preferred_simd_mode (enum machine_mode mode
)
41868 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
41870 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
41872 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
41874 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
41877 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
41883 if (!TARGET_VECTORIZE_DOUBLE
)
41885 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
41887 else if (TARGET_SSE2
)
41896 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
41899 static unsigned int
41900 ix86_autovectorize_vector_sizes (void)
41902 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
41907 /* Return class of registers which could be used for pseudo of MODE
41908 and of class RCLASS for spilling instead of memory. Return NO_REGS
41909 if it is not possible or non-profitable. */
41911 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
41913 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
41914 && hard_reg_set_subset_p (reg_class_contents
[rclass
],
41915 reg_class_contents
[GENERAL_REGS
])
41916 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
)))
41921 /* Implement targetm.vectorize.init_cost. */
41924 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
41926 unsigned *cost
= XNEWVEC (unsigned, 3);
41927 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
41931 /* Implement targetm.vectorize.add_stmt_cost. */
41934 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
41935 struct _stmt_vec_info
*stmt_info
, int misalign
,
41936 enum vect_cost_model_location where
)
41938 unsigned *cost
= (unsigned *) data
;
41939 unsigned retval
= 0;
41941 if (flag_vect_cost_model
)
41943 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
41944 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
41946 /* Statements in an inner loop relative to the loop being
41947 vectorized are weighted more heavily. The value here is
41948 arbitrary and could potentially be improved with analysis. */
41949 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
41950 count
*= 50; /* FIXME. */
41952 retval
= (unsigned) (count
* stmt_cost
);
41953 cost
[where
] += retval
;
41959 /* Implement targetm.vectorize.finish_cost. */
41962 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
41963 unsigned *body_cost
, unsigned *epilogue_cost
)
41965 unsigned *cost
= (unsigned *) data
;
41966 *prologue_cost
= cost
[vect_prologue
];
41967 *body_cost
= cost
[vect_body
];
41968 *epilogue_cost
= cost
[vect_epilogue
];
41971 /* Implement targetm.vectorize.destroy_cost_data. */
41974 ix86_destroy_cost_data (void *data
)
41979 /* Validate target specific memory model bits in VAL. */
41981 static unsigned HOST_WIDE_INT
41982 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
41984 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
41985 unsigned HOST_WIDE_INT strong
;
41987 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
41989 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
41991 warning (OPT_Winvalid_memory_model
,
41992 "Unknown architecture specific memory model");
41993 return MEMMODEL_SEQ_CST
;
41995 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
41996 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
41998 warning (OPT_Winvalid_memory_model
,
41999 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42000 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42002 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42004 warning (OPT_Winvalid_memory_model
,
42005 "HLE_RELEASE not used with RELEASE or stronger memory model");
42006 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42011 /* Initialize the GCC target structure. */
42012 #undef TARGET_RETURN_IN_MEMORY
42013 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42015 #undef TARGET_LEGITIMIZE_ADDRESS
42016 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42018 #undef TARGET_ATTRIBUTE_TABLE
42019 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42020 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42021 # undef TARGET_MERGE_DECL_ATTRIBUTES
42022 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42025 #undef TARGET_COMP_TYPE_ATTRIBUTES
42026 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42028 #undef TARGET_INIT_BUILTINS
42029 #define TARGET_INIT_BUILTINS ix86_init_builtins
42030 #undef TARGET_BUILTIN_DECL
42031 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42032 #undef TARGET_EXPAND_BUILTIN
42033 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42035 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42036 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42037 ix86_builtin_vectorized_function
42039 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42040 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42042 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42043 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42045 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42046 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42048 #undef TARGET_BUILTIN_RECIPROCAL
42049 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42051 #undef TARGET_ASM_FUNCTION_EPILOGUE
42052 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42054 #undef TARGET_ENCODE_SECTION_INFO
42055 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42056 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42058 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42061 #undef TARGET_ASM_OPEN_PAREN
42062 #define TARGET_ASM_OPEN_PAREN ""
42063 #undef TARGET_ASM_CLOSE_PAREN
42064 #define TARGET_ASM_CLOSE_PAREN ""
42066 #undef TARGET_ASM_BYTE_OP
42067 #define TARGET_ASM_BYTE_OP ASM_BYTE
42069 #undef TARGET_ASM_ALIGNED_HI_OP
42070 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42071 #undef TARGET_ASM_ALIGNED_SI_OP
42072 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42074 #undef TARGET_ASM_ALIGNED_DI_OP
42075 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42078 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42079 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42081 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42082 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42084 #undef TARGET_ASM_UNALIGNED_HI_OP
42085 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42086 #undef TARGET_ASM_UNALIGNED_SI_OP
42087 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42088 #undef TARGET_ASM_UNALIGNED_DI_OP
42089 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42091 #undef TARGET_PRINT_OPERAND
42092 #define TARGET_PRINT_OPERAND ix86_print_operand
42093 #undef TARGET_PRINT_OPERAND_ADDRESS
42094 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42095 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42096 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42097 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42098 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42100 #undef TARGET_SCHED_INIT_GLOBAL
42101 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42102 #undef TARGET_SCHED_ADJUST_COST
42103 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42104 #undef TARGET_SCHED_ISSUE_RATE
42105 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42106 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42107 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42108 ia32_multipass_dfa_lookahead
42110 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42111 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42113 #undef TARGET_MEMMODEL_CHECK
42114 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42117 #undef TARGET_HAVE_TLS
42118 #define TARGET_HAVE_TLS true
42120 #undef TARGET_CANNOT_FORCE_CONST_MEM
42121 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42122 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42123 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42125 #undef TARGET_DELEGITIMIZE_ADDRESS
42126 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42128 #undef TARGET_MS_BITFIELD_LAYOUT_P
42129 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42132 #undef TARGET_BINDS_LOCAL_P
42133 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42136 #undef TARGET_BINDS_LOCAL_P
42137 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42140 #undef TARGET_ASM_OUTPUT_MI_THUNK
42141 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42142 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42143 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42145 #undef TARGET_ASM_FILE_START
42146 #define TARGET_ASM_FILE_START x86_file_start
42148 #undef TARGET_OPTION_OVERRIDE
42149 #define TARGET_OPTION_OVERRIDE ix86_option_override
42151 #undef TARGET_REGISTER_MOVE_COST
42152 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42153 #undef TARGET_MEMORY_MOVE_COST
42154 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42155 #undef TARGET_RTX_COSTS
42156 #define TARGET_RTX_COSTS ix86_rtx_costs
42157 #undef TARGET_ADDRESS_COST
42158 #define TARGET_ADDRESS_COST ix86_address_cost
42160 #undef TARGET_FIXED_CONDITION_CODE_REGS
42161 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42162 #undef TARGET_CC_MODES_COMPATIBLE
42163 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42165 #undef TARGET_MACHINE_DEPENDENT_REORG
42166 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42168 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42169 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42171 #undef TARGET_BUILD_BUILTIN_VA_LIST
42172 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42174 #undef TARGET_FOLD_BUILTIN
42175 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42177 #undef TARGET_COMPARE_VERSION_PRIORITY
42178 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42180 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42181 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42182 ix86_generate_version_dispatcher_body
42184 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42185 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42186 ix86_get_function_versions_dispatcher
42188 #undef TARGET_ENUM_VA_LIST_P
42189 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42191 #undef TARGET_FN_ABI_VA_LIST
42192 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42194 #undef TARGET_CANONICAL_VA_LIST_TYPE
42195 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42197 #undef TARGET_EXPAND_BUILTIN_VA_START
42198 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42200 #undef TARGET_MD_ASM_CLOBBERS
42201 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42203 #undef TARGET_PROMOTE_PROTOTYPES
42204 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42205 #undef TARGET_STRUCT_VALUE_RTX
42206 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42207 #undef TARGET_SETUP_INCOMING_VARARGS
42208 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42209 #undef TARGET_MUST_PASS_IN_STACK
42210 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42211 #undef TARGET_FUNCTION_ARG_ADVANCE
42212 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42213 #undef TARGET_FUNCTION_ARG
42214 #define TARGET_FUNCTION_ARG ix86_function_arg
42215 #undef TARGET_FUNCTION_ARG_BOUNDARY
42216 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42217 #undef TARGET_PASS_BY_REFERENCE
42218 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42219 #undef TARGET_INTERNAL_ARG_POINTER
42220 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42221 #undef TARGET_UPDATE_STACK_BOUNDARY
42222 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42223 #undef TARGET_GET_DRAP_RTX
42224 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42225 #undef TARGET_STRICT_ARGUMENT_NAMING
42226 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42227 #undef TARGET_STATIC_CHAIN
42228 #define TARGET_STATIC_CHAIN ix86_static_chain
42229 #undef TARGET_TRAMPOLINE_INIT
42230 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42231 #undef TARGET_RETURN_POPS_ARGS
42232 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42234 #undef TARGET_LEGITIMATE_COMBINED_INSN
42235 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42237 #undef TARGET_ASAN_SHADOW_OFFSET
42238 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42240 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42241 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42243 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42244 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42246 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42247 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42249 #undef TARGET_C_MODE_FOR_SUFFIX
42250 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42253 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42254 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42257 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42258 #undef TARGET_INSERT_ATTRIBUTES
42259 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42262 #undef TARGET_MANGLE_TYPE
42263 #define TARGET_MANGLE_TYPE ix86_mangle_type
42266 #undef TARGET_STACK_PROTECT_FAIL
42267 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42270 #undef TARGET_FUNCTION_VALUE
42271 #define TARGET_FUNCTION_VALUE ix86_function_value
42273 #undef TARGET_FUNCTION_VALUE_REGNO_P
42274 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42276 #undef TARGET_PROMOTE_FUNCTION_MODE
42277 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42279 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42280 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42282 #undef TARGET_INSTANTIATE_DECLS
42283 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42285 #undef TARGET_SECONDARY_RELOAD
42286 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42288 #undef TARGET_CLASS_MAX_NREGS
42289 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42291 #undef TARGET_PREFERRED_RELOAD_CLASS
42292 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42293 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42294 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42295 #undef TARGET_CLASS_LIKELY_SPILLED_P
42296 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42298 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42299 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42300 ix86_builtin_vectorization_cost
42301 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42302 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42303 ix86_vectorize_vec_perm_const_ok
42304 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42305 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42306 ix86_preferred_simd_mode
42307 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42308 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42309 ix86_autovectorize_vector_sizes
42310 #undef TARGET_VECTORIZE_INIT_COST
42311 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42312 #undef TARGET_VECTORIZE_ADD_STMT_COST
42313 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42314 #undef TARGET_VECTORIZE_FINISH_COST
42315 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42316 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42317 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42319 #undef TARGET_SET_CURRENT_FUNCTION
42320 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42322 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42323 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42325 #undef TARGET_OPTION_SAVE
42326 #define TARGET_OPTION_SAVE ix86_function_specific_save
42328 #undef TARGET_OPTION_RESTORE
42329 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42331 #undef TARGET_OPTION_PRINT
42332 #define TARGET_OPTION_PRINT ix86_function_specific_print
42334 #undef TARGET_OPTION_FUNCTION_VERSIONS
42335 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42337 #undef TARGET_CAN_INLINE_P
42338 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42340 #undef TARGET_EXPAND_TO_RTL_HOOK
42341 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42343 #undef TARGET_LEGITIMATE_ADDRESS_P
42344 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42346 #undef TARGET_LRA_P
42347 #define TARGET_LRA_P hook_bool_void_true
42349 #undef TARGET_REGISTER_PRIORITY
42350 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42352 #undef TARGET_LEGITIMATE_CONSTANT_P
42353 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42355 #undef TARGET_FRAME_POINTER_REQUIRED
42356 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42358 #undef TARGET_CAN_ELIMINATE
42359 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42361 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42362 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42364 #undef TARGET_ASM_CODE_END
42365 #define TARGET_ASM_CODE_END ix86_code_end
42367 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42368 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42371 #undef TARGET_INIT_LIBFUNCS
42372 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42375 #undef TARGET_SPILL_CLASS
42376 #define TARGET_SPILL_CLASS ix86_spill_class
42378 struct gcc_target targetm
= TARGET_INITIALIZER
;
42380 #include "gt-i386.h"