1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
65 #include "pass_manager.h"
67 static rtx
legitimize_dllimport_symbol (rtx
, bool);
68 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
69 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
71 #ifndef CHECK_STACK_LIMIT
72 #define CHECK_STACK_LIMIT (-1)
75 /* Return index of given mode in mult and division cost tables. */
76 #define MODE_INDEX(mode) \
77 ((mode) == QImode ? 0 \
78 : (mode) == HImode ? 1 \
79 : (mode) == SImode ? 2 \
80 : (mode) == DImode ? 3 \
83 /* Processor costs (relative to an add) */
84 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
85 #define COSTS_N_BYTES(N) ((N) * 2)
87 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
89 static stringop_algs ix86_size_memcpy
[2] = {
90 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
91 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
92 static stringop_algs ix86_size_memset
[2] = {
93 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
94 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
97 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
98 COSTS_N_BYTES (2), /* cost of an add instruction */
99 COSTS_N_BYTES (3), /* cost of a lea instruction */
100 COSTS_N_BYTES (2), /* variable shift costs */
101 COSTS_N_BYTES (3), /* constant shift costs */
102 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
103 COSTS_N_BYTES (3), /* HI */
104 COSTS_N_BYTES (3), /* SI */
105 COSTS_N_BYTES (3), /* DI */
106 COSTS_N_BYTES (5)}, /* other */
107 0, /* cost of multiply per each bit set */
108 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
109 COSTS_N_BYTES (3), /* HI */
110 COSTS_N_BYTES (3), /* SI */
111 COSTS_N_BYTES (3), /* DI */
112 COSTS_N_BYTES (5)}, /* other */
113 COSTS_N_BYTES (3), /* cost of movsx */
114 COSTS_N_BYTES (3), /* cost of movzx */
115 0, /* "large" insn */
117 2, /* cost for loading QImode using movzbl */
118 {2, 2, 2}, /* cost of loading integer registers
119 in QImode, HImode and SImode.
120 Relative to reg-reg move (2). */
121 {2, 2, 2}, /* cost of storing integer registers */
122 2, /* cost of reg,reg fld/fst */
123 {2, 2, 2}, /* cost of loading fp registers
124 in SFmode, DFmode and XFmode */
125 {2, 2, 2}, /* cost of storing fp registers
126 in SFmode, DFmode and XFmode */
127 3, /* cost of moving MMX register */
128 {3, 3}, /* cost of loading MMX registers
129 in SImode and DImode */
130 {3, 3}, /* cost of storing MMX registers
131 in SImode and DImode */
132 3, /* cost of moving SSE register */
133 {3, 3, 3}, /* cost of loading SSE registers
134 in SImode, DImode and TImode */
135 {3, 3, 3}, /* cost of storing SSE registers
136 in SImode, DImode and TImode */
137 3, /* MMX or SSE register to integer */
138 0, /* size of l1 cache */
139 0, /* size of l2 cache */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
144 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
145 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
146 COSTS_N_BYTES (2), /* cost of FABS instruction. */
147 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
148 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
151 1, /* scalar_stmt_cost. */
152 1, /* scalar load_cost. */
153 1, /* scalar_store_cost. */
154 1, /* vec_stmt_cost. */
155 1, /* vec_to_scalar_cost. */
156 1, /* scalar_to_vec_cost. */
157 1, /* vec_align_load_cost. */
158 1, /* vec_unalign_load_cost. */
159 1, /* vec_store_cost. */
160 1, /* cond_taken_branch_cost. */
161 1, /* cond_not_taken_branch_cost. */
164 /* Processor costs (relative to an add) */
165 static stringop_algs i386_memcpy
[2] = {
166 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
167 DUMMY_STRINGOP_ALGS
};
168 static stringop_algs i386_memset
[2] = {
169 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
170 DUMMY_STRINGOP_ALGS
};
173 struct processor_costs i386_cost
= { /* 386 specific costs */
174 COSTS_N_INSNS (1), /* cost of an add instruction */
175 COSTS_N_INSNS (1), /* cost of a lea instruction */
176 COSTS_N_INSNS (3), /* variable shift costs */
177 COSTS_N_INSNS (2), /* constant shift costs */
178 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
179 COSTS_N_INSNS (6), /* HI */
180 COSTS_N_INSNS (6), /* SI */
181 COSTS_N_INSNS (6), /* DI */
182 COSTS_N_INSNS (6)}, /* other */
183 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
184 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
185 COSTS_N_INSNS (23), /* HI */
186 COSTS_N_INSNS (23), /* SI */
187 COSTS_N_INSNS (23), /* DI */
188 COSTS_N_INSNS (23)}, /* other */
189 COSTS_N_INSNS (3), /* cost of movsx */
190 COSTS_N_INSNS (2), /* cost of movzx */
191 15, /* "large" insn */
193 4, /* cost for loading QImode using movzbl */
194 {2, 4, 2}, /* cost of loading integer registers
195 in QImode, HImode and SImode.
196 Relative to reg-reg move (2). */
197 {2, 4, 2}, /* cost of storing integer registers */
198 2, /* cost of reg,reg fld/fst */
199 {8, 8, 8}, /* cost of loading fp registers
200 in SFmode, DFmode and XFmode */
201 {8, 8, 8}, /* cost of storing fp registers
202 in SFmode, DFmode and XFmode */
203 2, /* cost of moving MMX register */
204 {4, 8}, /* cost of loading MMX registers
205 in SImode and DImode */
206 {4, 8}, /* cost of storing MMX registers
207 in SImode and DImode */
208 2, /* cost of moving SSE register */
209 {4, 8, 16}, /* cost of loading SSE registers
210 in SImode, DImode and TImode */
211 {4, 8, 16}, /* cost of storing SSE registers
212 in SImode, DImode and TImode */
213 3, /* MMX or SSE register to integer */
214 0, /* size of l1 cache */
215 0, /* size of l2 cache */
216 0, /* size of prefetch block */
217 0, /* number of parallel prefetches */
219 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
220 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
221 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
222 COSTS_N_INSNS (22), /* cost of FABS instruction. */
223 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
224 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
227 1, /* scalar_stmt_cost. */
228 1, /* scalar load_cost. */
229 1, /* scalar_store_cost. */
230 1, /* vec_stmt_cost. */
231 1, /* vec_to_scalar_cost. */
232 1, /* scalar_to_vec_cost. */
233 1, /* vec_align_load_cost. */
234 2, /* vec_unalign_load_cost. */
235 1, /* vec_store_cost. */
236 3, /* cond_taken_branch_cost. */
237 1, /* cond_not_taken_branch_cost. */
240 static stringop_algs i486_memcpy
[2] = {
241 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
242 DUMMY_STRINGOP_ALGS
};
243 static stringop_algs i486_memset
[2] = {
244 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
245 DUMMY_STRINGOP_ALGS
};
248 struct processor_costs i486_cost
= { /* 486 specific costs */
249 COSTS_N_INSNS (1), /* cost of an add instruction */
250 COSTS_N_INSNS (1), /* cost of a lea instruction */
251 COSTS_N_INSNS (3), /* variable shift costs */
252 COSTS_N_INSNS (2), /* constant shift costs */
253 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
254 COSTS_N_INSNS (12), /* HI */
255 COSTS_N_INSNS (12), /* SI */
256 COSTS_N_INSNS (12), /* DI */
257 COSTS_N_INSNS (12)}, /* other */
258 1, /* cost of multiply per each bit set */
259 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
260 COSTS_N_INSNS (40), /* HI */
261 COSTS_N_INSNS (40), /* SI */
262 COSTS_N_INSNS (40), /* DI */
263 COSTS_N_INSNS (40)}, /* other */
264 COSTS_N_INSNS (3), /* cost of movsx */
265 COSTS_N_INSNS (2), /* cost of movzx */
266 15, /* "large" insn */
268 4, /* cost for loading QImode using movzbl */
269 {2, 4, 2}, /* cost of loading integer registers
270 in QImode, HImode and SImode.
271 Relative to reg-reg move (2). */
272 {2, 4, 2}, /* cost of storing integer registers */
273 2, /* cost of reg,reg fld/fst */
274 {8, 8, 8}, /* cost of loading fp registers
275 in SFmode, DFmode and XFmode */
276 {8, 8, 8}, /* cost of storing fp registers
277 in SFmode, DFmode and XFmode */
278 2, /* cost of moving MMX register */
279 {4, 8}, /* cost of loading MMX registers
280 in SImode and DImode */
281 {4, 8}, /* cost of storing MMX registers
282 in SImode and DImode */
283 2, /* cost of moving SSE register */
284 {4, 8, 16}, /* cost of loading SSE registers
285 in SImode, DImode and TImode */
286 {4, 8, 16}, /* cost of storing SSE registers
287 in SImode, DImode and TImode */
288 3, /* MMX or SSE register to integer */
289 4, /* size of l1 cache. 486 has 8kB cache
290 shared for code and data, so 4kB is
291 not really precise. */
292 4, /* size of l2 cache */
293 0, /* size of prefetch block */
294 0, /* number of parallel prefetches */
296 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
297 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
298 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
299 COSTS_N_INSNS (3), /* cost of FABS instruction. */
300 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
301 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
304 1, /* scalar_stmt_cost. */
305 1, /* scalar load_cost. */
306 1, /* scalar_store_cost. */
307 1, /* vec_stmt_cost. */
308 1, /* vec_to_scalar_cost. */
309 1, /* scalar_to_vec_cost. */
310 1, /* vec_align_load_cost. */
311 2, /* vec_unalign_load_cost. */
312 1, /* vec_store_cost. */
313 3, /* cond_taken_branch_cost. */
314 1, /* cond_not_taken_branch_cost. */
317 static stringop_algs pentium_memcpy
[2] = {
318 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
319 DUMMY_STRINGOP_ALGS
};
320 static stringop_algs pentium_memset
[2] = {
321 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
322 DUMMY_STRINGOP_ALGS
};
325 struct processor_costs pentium_cost
= {
326 COSTS_N_INSNS (1), /* cost of an add instruction */
327 COSTS_N_INSNS (1), /* cost of a lea instruction */
328 COSTS_N_INSNS (4), /* variable shift costs */
329 COSTS_N_INSNS (1), /* constant shift costs */
330 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
331 COSTS_N_INSNS (11), /* HI */
332 COSTS_N_INSNS (11), /* SI */
333 COSTS_N_INSNS (11), /* DI */
334 COSTS_N_INSNS (11)}, /* other */
335 0, /* cost of multiply per each bit set */
336 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
337 COSTS_N_INSNS (25), /* HI */
338 COSTS_N_INSNS (25), /* SI */
339 COSTS_N_INSNS (25), /* DI */
340 COSTS_N_INSNS (25)}, /* other */
341 COSTS_N_INSNS (3), /* cost of movsx */
342 COSTS_N_INSNS (2), /* cost of movzx */
343 8, /* "large" insn */
345 6, /* cost for loading QImode using movzbl */
346 {2, 4, 2}, /* cost of loading integer registers
347 in QImode, HImode and SImode.
348 Relative to reg-reg move (2). */
349 {2, 4, 2}, /* cost of storing integer registers */
350 2, /* cost of reg,reg fld/fst */
351 {2, 2, 6}, /* cost of loading fp registers
352 in SFmode, DFmode and XFmode */
353 {4, 4, 6}, /* cost of storing fp registers
354 in SFmode, DFmode and XFmode */
355 8, /* cost of moving MMX register */
356 {8, 8}, /* cost of loading MMX registers
357 in SImode and DImode */
358 {8, 8}, /* cost of storing MMX registers
359 in SImode and DImode */
360 2, /* cost of moving SSE register */
361 {4, 8, 16}, /* cost of loading SSE registers
362 in SImode, DImode and TImode */
363 {4, 8, 16}, /* cost of storing SSE registers
364 in SImode, DImode and TImode */
365 3, /* MMX or SSE register to integer */
366 8, /* size of l1 cache. */
367 8, /* size of l2 cache */
368 0, /* size of prefetch block */
369 0, /* number of parallel prefetches */
371 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
372 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
373 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
374 COSTS_N_INSNS (1), /* cost of FABS instruction. */
375 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
376 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
379 1, /* scalar_stmt_cost. */
380 1, /* scalar load_cost. */
381 1, /* scalar_store_cost. */
382 1, /* vec_stmt_cost. */
383 1, /* vec_to_scalar_cost. */
384 1, /* scalar_to_vec_cost. */
385 1, /* vec_align_load_cost. */
386 2, /* vec_unalign_load_cost. */
387 1, /* vec_store_cost. */
388 3, /* cond_taken_branch_cost. */
389 1, /* cond_not_taken_branch_cost. */
392 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
393 (we ensure the alignment). For small blocks inline loop is still a
394 noticeable win, for bigger blocks either rep movsl or rep movsb is
395 way to go. Rep movsb has apparently more expensive startup time in CPU,
396 but after 4K the difference is down in the noise. */
397 static stringop_algs pentiumpro_memcpy
[2] = {
398 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
399 {8192, rep_prefix_4_byte
, false},
400 {-1, rep_prefix_1_byte
, false}}},
401 DUMMY_STRINGOP_ALGS
};
402 static stringop_algs pentiumpro_memset
[2] = {
403 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
404 {8192, rep_prefix_4_byte
, false},
405 {-1, libcall
, false}}},
406 DUMMY_STRINGOP_ALGS
};
408 struct processor_costs pentiumpro_cost
= {
409 COSTS_N_INSNS (1), /* cost of an add instruction */
410 COSTS_N_INSNS (1), /* cost of a lea instruction */
411 COSTS_N_INSNS (1), /* variable shift costs */
412 COSTS_N_INSNS (1), /* constant shift costs */
413 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
414 COSTS_N_INSNS (4), /* HI */
415 COSTS_N_INSNS (4), /* SI */
416 COSTS_N_INSNS (4), /* DI */
417 COSTS_N_INSNS (4)}, /* other */
418 0, /* cost of multiply per each bit set */
419 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
420 COSTS_N_INSNS (17), /* HI */
421 COSTS_N_INSNS (17), /* SI */
422 COSTS_N_INSNS (17), /* DI */
423 COSTS_N_INSNS (17)}, /* other */
424 COSTS_N_INSNS (1), /* cost of movsx */
425 COSTS_N_INSNS (1), /* cost of movzx */
426 8, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 4, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 2, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of storing fp registers
437 in SFmode, DFmode and XFmode */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 2, /* cost of moving SSE register */
444 {2, 2, 8}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 3, /* MMX or SSE register to integer */
449 8, /* size of l1 cache. */
450 256, /* size of l2 cache */
451 32, /* size of prefetch block */
452 6, /* number of parallel prefetches */
454 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
455 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
456 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
459 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
462 1, /* scalar_stmt_cost. */
463 1, /* scalar load_cost. */
464 1, /* scalar_store_cost. */
465 1, /* vec_stmt_cost. */
466 1, /* vec_to_scalar_cost. */
467 1, /* scalar_to_vec_cost. */
468 1, /* vec_align_load_cost. */
469 2, /* vec_unalign_load_cost. */
470 1, /* vec_store_cost. */
471 3, /* cond_taken_branch_cost. */
472 1, /* cond_not_taken_branch_cost. */
475 static stringop_algs geode_memcpy
[2] = {
476 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
477 DUMMY_STRINGOP_ALGS
};
478 static stringop_algs geode_memset
[2] = {
479 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
480 DUMMY_STRINGOP_ALGS
};
482 struct processor_costs geode_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (1), /* cost of a lea instruction */
485 COSTS_N_INSNS (2), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (4), /* HI */
489 COSTS_N_INSNS (7), /* SI */
490 COSTS_N_INSNS (7), /* DI */
491 COSTS_N_INSNS (7)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (23), /* HI */
495 COSTS_N_INSNS (39), /* SI */
496 COSTS_N_INSNS (39), /* DI */
497 COSTS_N_INSNS (39)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 1, /* cost for loading QImode using movzbl */
503 {1, 1, 1}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {1, 1, 1}, /* cost of storing integer registers */
507 1, /* cost of reg,reg fld/fst */
508 {1, 1, 1}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {4, 6, 6}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
513 1, /* cost of moving MMX register */
514 {1, 1}, /* cost of loading MMX registers
515 in SImode and DImode */
516 {1, 1}, /* cost of storing MMX registers
517 in SImode and DImode */
518 1, /* cost of moving SSE register */
519 {1, 1, 1}, /* cost of loading SSE registers
520 in SImode, DImode and TImode */
521 {1, 1, 1}, /* cost of storing SSE registers
522 in SImode, DImode and TImode */
523 1, /* MMX or SSE register to integer */
524 64, /* size of l1 cache. */
525 128, /* size of l2 cache. */
526 32, /* size of prefetch block */
527 1, /* number of parallel prefetches */
529 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
530 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
531 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
532 COSTS_N_INSNS (1), /* cost of FABS instruction. */
533 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
534 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
537 1, /* scalar_stmt_cost. */
538 1, /* scalar load_cost. */
539 1, /* scalar_store_cost. */
540 1, /* vec_stmt_cost. */
541 1, /* vec_to_scalar_cost. */
542 1, /* scalar_to_vec_cost. */
543 1, /* vec_align_load_cost. */
544 2, /* vec_unalign_load_cost. */
545 1, /* vec_store_cost. */
546 3, /* cond_taken_branch_cost. */
547 1, /* cond_not_taken_branch_cost. */
550 static stringop_algs k6_memcpy
[2] = {
551 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
552 DUMMY_STRINGOP_ALGS
};
553 static stringop_algs k6_memset
[2] = {
554 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
555 DUMMY_STRINGOP_ALGS
};
557 struct processor_costs k6_cost
= {
558 COSTS_N_INSNS (1), /* cost of an add instruction */
559 COSTS_N_INSNS (2), /* cost of a lea instruction */
560 COSTS_N_INSNS (1), /* variable shift costs */
561 COSTS_N_INSNS (1), /* constant shift costs */
562 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
563 COSTS_N_INSNS (3), /* HI */
564 COSTS_N_INSNS (3), /* SI */
565 COSTS_N_INSNS (3), /* DI */
566 COSTS_N_INSNS (3)}, /* other */
567 0, /* cost of multiply per each bit set */
568 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
569 COSTS_N_INSNS (18), /* HI */
570 COSTS_N_INSNS (18), /* SI */
571 COSTS_N_INSNS (18), /* DI */
572 COSTS_N_INSNS (18)}, /* other */
573 COSTS_N_INSNS (2), /* cost of movsx */
574 COSTS_N_INSNS (2), /* cost of movzx */
575 8, /* "large" insn */
577 3, /* cost for loading QImode using movzbl */
578 {4, 5, 4}, /* cost of loading integer registers
579 in QImode, HImode and SImode.
580 Relative to reg-reg move (2). */
581 {2, 3, 2}, /* cost of storing integer registers */
582 4, /* cost of reg,reg fld/fst */
583 {6, 6, 6}, /* cost of loading fp registers
584 in SFmode, DFmode and XFmode */
585 {4, 4, 4}, /* cost of storing fp registers
586 in SFmode, DFmode and XFmode */
587 2, /* cost of moving MMX register */
588 {2, 2}, /* cost of loading MMX registers
589 in SImode and DImode */
590 {2, 2}, /* cost of storing MMX registers
591 in SImode and DImode */
592 2, /* cost of moving SSE register */
593 {2, 2, 8}, /* cost of loading SSE registers
594 in SImode, DImode and TImode */
595 {2, 2, 8}, /* cost of storing SSE registers
596 in SImode, DImode and TImode */
597 6, /* MMX or SSE register to integer */
598 32, /* size of l1 cache. */
599 32, /* size of l2 cache. Some models
600 have integrated l2 cache, but
601 optimizing for k6 is not important
602 enough to worry about that. */
603 32, /* size of prefetch block */
604 1, /* number of parallel prefetches */
606 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
607 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
608 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
609 COSTS_N_INSNS (2), /* cost of FABS instruction. */
610 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
611 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
614 1, /* scalar_stmt_cost. */
615 1, /* scalar load_cost. */
616 1, /* scalar_store_cost. */
617 1, /* vec_stmt_cost. */
618 1, /* vec_to_scalar_cost. */
619 1, /* scalar_to_vec_cost. */
620 1, /* vec_align_load_cost. */
621 2, /* vec_unalign_load_cost. */
622 1, /* vec_store_cost. */
623 3, /* cond_taken_branch_cost. */
624 1, /* cond_not_taken_branch_cost. */
627 /* For some reason, Athlon deals better with REP prefix (relative to loops)
628 compared to K8. Alignment becomes important after 8 bytes for memcpy and
629 128 bytes for memset. */
630 static stringop_algs athlon_memcpy
[2] = {
631 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
632 DUMMY_STRINGOP_ALGS
};
633 static stringop_algs athlon_memset
[2] = {
634 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
635 DUMMY_STRINGOP_ALGS
};
637 struct processor_costs athlon_cost
= {
638 COSTS_N_INSNS (1), /* cost of an add instruction */
639 COSTS_N_INSNS (2), /* cost of a lea instruction */
640 COSTS_N_INSNS (1), /* variable shift costs */
641 COSTS_N_INSNS (1), /* constant shift costs */
642 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
643 COSTS_N_INSNS (5), /* HI */
644 COSTS_N_INSNS (5), /* SI */
645 COSTS_N_INSNS (5), /* DI */
646 COSTS_N_INSNS (5)}, /* other */
647 0, /* cost of multiply per each bit set */
648 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
649 COSTS_N_INSNS (26), /* HI */
650 COSTS_N_INSNS (42), /* SI */
651 COSTS_N_INSNS (74), /* DI */
652 COSTS_N_INSNS (74)}, /* other */
653 COSTS_N_INSNS (1), /* cost of movsx */
654 COSTS_N_INSNS (1), /* cost of movzx */
655 8, /* "large" insn */
657 4, /* cost for loading QImode using movzbl */
658 {3, 4, 3}, /* cost of loading integer registers
659 in QImode, HImode and SImode.
660 Relative to reg-reg move (2). */
661 {3, 4, 3}, /* cost of storing integer registers */
662 4, /* cost of reg,reg fld/fst */
663 {4, 4, 12}, /* cost of loading fp registers
664 in SFmode, DFmode and XFmode */
665 {6, 6, 8}, /* cost of storing fp registers
666 in SFmode, DFmode and XFmode */
667 2, /* cost of moving MMX register */
668 {4, 4}, /* cost of loading MMX registers
669 in SImode and DImode */
670 {4, 4}, /* cost of storing MMX registers
671 in SImode and DImode */
672 2, /* cost of moving SSE register */
673 {4, 4, 6}, /* cost of loading SSE registers
674 in SImode, DImode and TImode */
675 {4, 4, 5}, /* cost of storing SSE registers
676 in SImode, DImode and TImode */
677 5, /* MMX or SSE register to integer */
678 64, /* size of l1 cache. */
679 256, /* size of l2 cache. */
680 64, /* size of prefetch block */
681 6, /* number of parallel prefetches */
683 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
684 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
685 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
686 COSTS_N_INSNS (2), /* cost of FABS instruction. */
687 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
688 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
691 1, /* scalar_stmt_cost. */
692 1, /* scalar load_cost. */
693 1, /* scalar_store_cost. */
694 1, /* vec_stmt_cost. */
695 1, /* vec_to_scalar_cost. */
696 1, /* scalar_to_vec_cost. */
697 1, /* vec_align_load_cost. */
698 2, /* vec_unalign_load_cost. */
699 1, /* vec_store_cost. */
700 3, /* cond_taken_branch_cost. */
701 1, /* cond_not_taken_branch_cost. */
704 /* K8 has optimized REP instruction for medium sized blocks, but for very
705 small blocks it is better to use loop. For large blocks, libcall can
706 do nontemporary accesses and beat inline considerably. */
707 static stringop_algs k8_memcpy
[2] = {
708 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
709 {-1, rep_prefix_4_byte
, false}}},
710 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
711 {-1, libcall
, false}}}};
712 static stringop_algs k8_memset
[2] = {
713 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
714 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
715 {libcall
, {{48, unrolled_loop
, false},
716 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
718 struct processor_costs k8_cost
= {
719 COSTS_N_INSNS (1), /* cost of an add instruction */
720 COSTS_N_INSNS (2), /* cost of a lea instruction */
721 COSTS_N_INSNS (1), /* variable shift costs */
722 COSTS_N_INSNS (1), /* constant shift costs */
723 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
724 COSTS_N_INSNS (4), /* HI */
725 COSTS_N_INSNS (3), /* SI */
726 COSTS_N_INSNS (4), /* DI */
727 COSTS_N_INSNS (5)}, /* other */
728 0, /* cost of multiply per each bit set */
729 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
730 COSTS_N_INSNS (26), /* HI */
731 COSTS_N_INSNS (42), /* SI */
732 COSTS_N_INSNS (74), /* DI */
733 COSTS_N_INSNS (74)}, /* other */
734 COSTS_N_INSNS (1), /* cost of movsx */
735 COSTS_N_INSNS (1), /* cost of movzx */
736 8, /* "large" insn */
738 4, /* cost for loading QImode using movzbl */
739 {3, 4, 3}, /* cost of loading integer registers
740 in QImode, HImode and SImode.
741 Relative to reg-reg move (2). */
742 {3, 4, 3}, /* cost of storing integer registers */
743 4, /* cost of reg,reg fld/fst */
744 {4, 4, 12}, /* cost of loading fp registers
745 in SFmode, DFmode and XFmode */
746 {6, 6, 8}, /* cost of storing fp registers
747 in SFmode, DFmode and XFmode */
748 2, /* cost of moving MMX register */
749 {3, 3}, /* cost of loading MMX registers
750 in SImode and DImode */
751 {4, 4}, /* cost of storing MMX registers
752 in SImode and DImode */
753 2, /* cost of moving SSE register */
754 {4, 3, 6}, /* cost of loading SSE registers
755 in SImode, DImode and TImode */
756 {4, 4, 5}, /* cost of storing SSE registers
757 in SImode, DImode and TImode */
758 5, /* MMX or SSE register to integer */
759 64, /* size of l1 cache. */
760 512, /* size of l2 cache. */
761 64, /* size of prefetch block */
762 /* New AMD processors never drop prefetches; if they cannot be performed
763 immediately, they are queued. We set number of simultaneous prefetches
764 to a large constant to reflect this (it probably is not a good idea not
765 to limit number of prefetches at all, as their execution also takes some
767 100, /* number of parallel prefetches */
769 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
770 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
771 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
772 COSTS_N_INSNS (2), /* cost of FABS instruction. */
773 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
774 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
778 4, /* scalar_stmt_cost. */
779 2, /* scalar load_cost. */
780 2, /* scalar_store_cost. */
781 5, /* vec_stmt_cost. */
782 0, /* vec_to_scalar_cost. */
783 2, /* scalar_to_vec_cost. */
784 2, /* vec_align_load_cost. */
785 3, /* vec_unalign_load_cost. */
786 3, /* vec_store_cost. */
787 3, /* cond_taken_branch_cost. */
788 2, /* cond_not_taken_branch_cost. */
791 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
792 very small blocks it is better to use loop. For large blocks, libcall can
793 do nontemporary accesses and beat inline considerably. */
794 static stringop_algs amdfam10_memcpy
[2] = {
795 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
796 {-1, rep_prefix_4_byte
, false}}},
797 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
798 {-1, libcall
, false}}}};
799 static stringop_algs amdfam10_memset
[2] = {
800 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
801 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
802 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
803 {-1, libcall
, false}}}};
804 struct processor_costs amdfam10_cost
= {
805 COSTS_N_INSNS (1), /* cost of an add instruction */
806 COSTS_N_INSNS (2), /* cost of a lea instruction */
807 COSTS_N_INSNS (1), /* variable shift costs */
808 COSTS_N_INSNS (1), /* constant shift costs */
809 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
810 COSTS_N_INSNS (4), /* HI */
811 COSTS_N_INSNS (3), /* SI */
812 COSTS_N_INSNS (4), /* DI */
813 COSTS_N_INSNS (5)}, /* other */
814 0, /* cost of multiply per each bit set */
815 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
816 COSTS_N_INSNS (35), /* HI */
817 COSTS_N_INSNS (51), /* SI */
818 COSTS_N_INSNS (83), /* DI */
819 COSTS_N_INSNS (83)}, /* other */
820 COSTS_N_INSNS (1), /* cost of movsx */
821 COSTS_N_INSNS (1), /* cost of movzx */
822 8, /* "large" insn */
824 4, /* cost for loading QImode using movzbl */
825 {3, 4, 3}, /* cost of loading integer registers
826 in QImode, HImode and SImode.
827 Relative to reg-reg move (2). */
828 {3, 4, 3}, /* cost of storing integer registers */
829 4, /* cost of reg,reg fld/fst */
830 {4, 4, 12}, /* cost of loading fp registers
831 in SFmode, DFmode and XFmode */
832 {6, 6, 8}, /* cost of storing fp registers
833 in SFmode, DFmode and XFmode */
834 2, /* cost of moving MMX register */
835 {3, 3}, /* cost of loading MMX registers
836 in SImode and DImode */
837 {4, 4}, /* cost of storing MMX registers
838 in SImode and DImode */
839 2, /* cost of moving SSE register */
840 {4, 4, 3}, /* cost of loading SSE registers
841 in SImode, DImode and TImode */
842 {4, 4, 5}, /* cost of storing SSE registers
843 in SImode, DImode and TImode */
844 3, /* MMX or SSE register to integer */
846 MOVD reg64, xmmreg Double FSTORE 4
847 MOVD reg32, xmmreg Double FSTORE 4
849 MOVD reg64, xmmreg Double FADD 3
851 MOVD reg32, xmmreg Double FADD 3
853 64, /* size of l1 cache. */
854 512, /* size of l2 cache. */
855 64, /* size of prefetch block */
856 /* New AMD processors never drop prefetches; if they cannot be performed
857 immediately, they are queued. We set number of simultaneous prefetches
858 to a large constant to reflect this (it probably is not a good idea not
859 to limit number of prefetches at all, as their execution also takes some
861 100, /* number of parallel prefetches */
863 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
864 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
865 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
866 COSTS_N_INSNS (2), /* cost of FABS instruction. */
867 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
868 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
872 4, /* scalar_stmt_cost. */
873 2, /* scalar load_cost. */
874 2, /* scalar_store_cost. */
875 6, /* vec_stmt_cost. */
876 0, /* vec_to_scalar_cost. */
877 2, /* scalar_to_vec_cost. */
878 2, /* vec_align_load_cost. */
879 2, /* vec_unalign_load_cost. */
880 2, /* vec_store_cost. */
881 2, /* cond_taken_branch_cost. */
882 1, /* cond_not_taken_branch_cost. */
885 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
886 very small blocks it is better to use loop. For large blocks, libcall
887 can do nontemporary accesses and beat inline considerably. */
888 static stringop_algs bdver1_memcpy
[2] = {
889 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
890 {-1, rep_prefix_4_byte
, false}}},
891 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
892 {-1, libcall
, false}}}};
893 static stringop_algs bdver1_memset
[2] = {
894 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
895 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
896 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
897 {-1, libcall
, false}}}};
899 const struct processor_costs bdver1_cost
= {
900 COSTS_N_INSNS (1), /* cost of an add instruction */
901 COSTS_N_INSNS (1), /* cost of a lea instruction */
902 COSTS_N_INSNS (1), /* variable shift costs */
903 COSTS_N_INSNS (1), /* constant shift costs */
904 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
905 COSTS_N_INSNS (4), /* HI */
906 COSTS_N_INSNS (4), /* SI */
907 COSTS_N_INSNS (6), /* DI */
908 COSTS_N_INSNS (6)}, /* other */
909 0, /* cost of multiply per each bit set */
910 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
911 COSTS_N_INSNS (35), /* HI */
912 COSTS_N_INSNS (51), /* SI */
913 COSTS_N_INSNS (83), /* DI */
914 COSTS_N_INSNS (83)}, /* other */
915 COSTS_N_INSNS (1), /* cost of movsx */
916 COSTS_N_INSNS (1), /* cost of movzx */
917 8, /* "large" insn */
919 4, /* cost for loading QImode using movzbl */
920 {5, 5, 4}, /* cost of loading integer registers
921 in QImode, HImode and SImode.
922 Relative to reg-reg move (2). */
923 {4, 4, 4}, /* cost of storing integer registers */
924 2, /* cost of reg,reg fld/fst */
925 {5, 5, 12}, /* cost of loading fp registers
926 in SFmode, DFmode and XFmode */
927 {4, 4, 8}, /* cost of storing fp registers
928 in SFmode, DFmode and XFmode */
929 2, /* cost of moving MMX register */
930 {4, 4}, /* cost of loading MMX registers
931 in SImode and DImode */
932 {4, 4}, /* cost of storing MMX registers
933 in SImode and DImode */
934 2, /* cost of moving SSE register */
935 {4, 4, 4}, /* cost of loading SSE registers
936 in SImode, DImode and TImode */
937 {4, 4, 4}, /* cost of storing SSE registers
938 in SImode, DImode and TImode */
939 2, /* MMX or SSE register to integer */
941 MOVD reg64, xmmreg Double FSTORE 4
942 MOVD reg32, xmmreg Double FSTORE 4
944 MOVD reg64, xmmreg Double FADD 3
946 MOVD reg32, xmmreg Double FADD 3
948 16, /* size of l1 cache. */
949 2048, /* size of l2 cache. */
950 64, /* size of prefetch block */
951 /* New AMD processors never drop prefetches; if they cannot be performed
952 immediately, they are queued. We set number of simultaneous prefetches
953 to a large constant to reflect this (it probably is not a good idea not
954 to limit number of prefetches at all, as their execution also takes some
956 100, /* number of parallel prefetches */
958 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
959 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
960 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
961 COSTS_N_INSNS (2), /* cost of FABS instruction. */
962 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
963 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
967 6, /* scalar_stmt_cost. */
968 4, /* scalar load_cost. */
969 4, /* scalar_store_cost. */
970 6, /* vec_stmt_cost. */
971 0, /* vec_to_scalar_cost. */
972 2, /* scalar_to_vec_cost. */
973 4, /* vec_align_load_cost. */
974 4, /* vec_unalign_load_cost. */
975 4, /* vec_store_cost. */
976 2, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
980 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
981 very small blocks it is better to use loop. For large blocks, libcall
982 can do nontemporary accesses and beat inline considerably. */
984 static stringop_algs bdver2_memcpy
[2] = {
985 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
986 {-1, rep_prefix_4_byte
, false}}},
987 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
988 {-1, libcall
, false}}}};
989 static stringop_algs bdver2_memset
[2] = {
990 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
991 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
992 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
993 {-1, libcall
, false}}}};
995 const struct processor_costs bdver2_cost
= {
996 COSTS_N_INSNS (1), /* cost of an add instruction */
997 COSTS_N_INSNS (1), /* cost of a lea instruction */
998 COSTS_N_INSNS (1), /* variable shift costs */
999 COSTS_N_INSNS (1), /* constant shift costs */
1000 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1001 COSTS_N_INSNS (4), /* HI */
1002 COSTS_N_INSNS (4), /* SI */
1003 COSTS_N_INSNS (6), /* DI */
1004 COSTS_N_INSNS (6)}, /* other */
1005 0, /* cost of multiply per each bit set */
1006 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1007 COSTS_N_INSNS (35), /* HI */
1008 COSTS_N_INSNS (51), /* SI */
1009 COSTS_N_INSNS (83), /* DI */
1010 COSTS_N_INSNS (83)}, /* other */
1011 COSTS_N_INSNS (1), /* cost of movsx */
1012 COSTS_N_INSNS (1), /* cost of movzx */
1013 8, /* "large" insn */
1015 4, /* cost for loading QImode using movzbl */
1016 {5, 5, 4}, /* cost of loading integer registers
1017 in QImode, HImode and SImode.
1018 Relative to reg-reg move (2). */
1019 {4, 4, 4}, /* cost of storing integer registers */
1020 2, /* cost of reg,reg fld/fst */
1021 {5, 5, 12}, /* cost of loading fp registers
1022 in SFmode, DFmode and XFmode */
1023 {4, 4, 8}, /* cost of storing fp registers
1024 in SFmode, DFmode and XFmode */
1025 2, /* cost of moving MMX register */
1026 {4, 4}, /* cost of loading MMX registers
1027 in SImode and DImode */
1028 {4, 4}, /* cost of storing MMX registers
1029 in SImode and DImode */
1030 2, /* cost of moving SSE register */
1031 {4, 4, 4}, /* cost of loading SSE registers
1032 in SImode, DImode and TImode */
1033 {4, 4, 4}, /* cost of storing SSE registers
1034 in SImode, DImode and TImode */
1035 2, /* MMX or SSE register to integer */
1037 MOVD reg64, xmmreg Double FSTORE 4
1038 MOVD reg32, xmmreg Double FSTORE 4
1040 MOVD reg64, xmmreg Double FADD 3
1042 MOVD reg32, xmmreg Double FADD 3
1044 16, /* size of l1 cache. */
1045 2048, /* size of l2 cache. */
1046 64, /* size of prefetch block */
1047 /* New AMD processors never drop prefetches; if they cannot be performed
1048 immediately, they are queued. We set number of simultaneous prefetches
1049 to a large constant to reflect this (it probably is not a good idea not
1050 to limit number of prefetches at all, as their execution also takes some
1052 100, /* number of parallel prefetches */
1053 2, /* Branch cost */
1054 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1055 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1056 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1057 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1058 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1059 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1063 6, /* scalar_stmt_cost. */
1064 4, /* scalar load_cost. */
1065 4, /* scalar_store_cost. */
1066 6, /* vec_stmt_cost. */
1067 0, /* vec_to_scalar_cost. */
1068 2, /* scalar_to_vec_cost. */
1069 4, /* vec_align_load_cost. */
1070 4, /* vec_unalign_load_cost. */
1071 4, /* vec_store_cost. */
1072 2, /* cond_taken_branch_cost. */
1073 1, /* cond_not_taken_branch_cost. */
1077 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1078 very small blocks it is better to use loop. For large blocks, libcall
1079 can do nontemporary accesses and beat inline considerably. */
1080 static stringop_algs bdver3_memcpy
[2] = {
1081 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1082 {-1, rep_prefix_4_byte
, false}}},
1083 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1084 {-1, libcall
, false}}}};
1085 static stringop_algs bdver3_memset
[2] = {
1086 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1087 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1088 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1089 {-1, libcall
, false}}}};
1090 struct processor_costs bdver3_cost
= {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (1), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (4), /* SI */
1098 COSTS_N_INSNS (6), /* DI */
1099 COSTS_N_INSNS (6)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (35), /* HI */
1103 COSTS_N_INSNS (51), /* SI */
1104 COSTS_N_INSNS (83), /* DI */
1105 COSTS_N_INSNS (83)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {5, 5, 4}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {4, 4, 4}, /* cost of storing integer registers */
1115 2, /* cost of reg,reg fld/fst */
1116 {5, 5, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {4, 4, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {4, 4}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 4, 4}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 4}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 2, /* MMX or SSE register to integer */
1131 16, /* size of l1 cache. */
1132 2048, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 2, /* Branch cost */
1141 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1150 6, /* scalar_stmt_cost. */
1151 4, /* scalar load_cost. */
1152 4, /* scalar_store_cost. */
1153 6, /* vec_stmt_cost. */
1154 0, /* vec_to_scalar_cost. */
1155 2, /* scalar_to_vec_cost. */
1156 4, /* vec_align_load_cost. */
1157 4, /* vec_unalign_load_cost. */
1158 4, /* vec_store_cost. */
1159 2, /* cond_taken_branch_cost. */
1160 1, /* cond_not_taken_branch_cost. */
1163 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1164 very small blocks it is better to use loop. For large blocks, libcall can
1165 do nontemporary accesses and beat inline considerably. */
1166 static stringop_algs btver1_memcpy
[2] = {
1167 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1168 {-1, rep_prefix_4_byte
, false}}},
1169 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}};
1171 static stringop_algs btver1_memset
[2] = {
1172 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1173 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1174 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1175 {-1, libcall
, false}}}};
1176 const struct processor_costs btver1_cost
= {
1177 COSTS_N_INSNS (1), /* cost of an add instruction */
1178 COSTS_N_INSNS (2), /* cost of a lea instruction */
1179 COSTS_N_INSNS (1), /* variable shift costs */
1180 COSTS_N_INSNS (1), /* constant shift costs */
1181 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1182 COSTS_N_INSNS (4), /* HI */
1183 COSTS_N_INSNS (3), /* SI */
1184 COSTS_N_INSNS (4), /* DI */
1185 COSTS_N_INSNS (5)}, /* other */
1186 0, /* cost of multiply per each bit set */
1187 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1188 COSTS_N_INSNS (35), /* HI */
1189 COSTS_N_INSNS (51), /* SI */
1190 COSTS_N_INSNS (83), /* DI */
1191 COSTS_N_INSNS (83)}, /* other */
1192 COSTS_N_INSNS (1), /* cost of movsx */
1193 COSTS_N_INSNS (1), /* cost of movzx */
1194 8, /* "large" insn */
1196 4, /* cost for loading QImode using movzbl */
1197 {3, 4, 3}, /* cost of loading integer registers
1198 in QImode, HImode and SImode.
1199 Relative to reg-reg move (2). */
1200 {3, 4, 3}, /* cost of storing integer registers */
1201 4, /* cost of reg,reg fld/fst */
1202 {4, 4, 12}, /* cost of loading fp registers
1203 in SFmode, DFmode and XFmode */
1204 {6, 6, 8}, /* cost of storing fp registers
1205 in SFmode, DFmode and XFmode */
1206 2, /* cost of moving MMX register */
1207 {3, 3}, /* cost of loading MMX registers
1208 in SImode and DImode */
1209 {4, 4}, /* cost of storing MMX registers
1210 in SImode and DImode */
1211 2, /* cost of moving SSE register */
1212 {4, 4, 3}, /* cost of loading SSE registers
1213 in SImode, DImode and TImode */
1214 {4, 4, 5}, /* cost of storing SSE registers
1215 in SImode, DImode and TImode */
1216 3, /* MMX or SSE register to integer */
1218 MOVD reg64, xmmreg Double FSTORE 4
1219 MOVD reg32, xmmreg Double FSTORE 4
1221 MOVD reg64, xmmreg Double FADD 3
1223 MOVD reg32, xmmreg Double FADD 3
1225 32, /* size of l1 cache. */
1226 512, /* size of l2 cache. */
1227 64, /* size of prefetch block */
1228 100, /* number of parallel prefetches */
1229 2, /* Branch cost */
1230 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1231 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1232 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1233 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1234 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1235 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1239 4, /* scalar_stmt_cost. */
1240 2, /* scalar load_cost. */
1241 2, /* scalar_store_cost. */
1242 6, /* vec_stmt_cost. */
1243 0, /* vec_to_scalar_cost. */
1244 2, /* scalar_to_vec_cost. */
1245 2, /* vec_align_load_cost. */
1246 2, /* vec_unalign_load_cost. */
1247 2, /* vec_store_cost. */
1248 2, /* cond_taken_branch_cost. */
1249 1, /* cond_not_taken_branch_cost. */
1252 static stringop_algs btver2_memcpy
[2] = {
1253 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1254 {-1, rep_prefix_4_byte
, false}}},
1255 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1256 {-1, libcall
, false}}}};
1257 static stringop_algs btver2_memset
[2] = {
1258 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1259 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1260 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1261 {-1, libcall
, false}}}};
1262 const struct processor_costs btver2_cost
= {
1263 COSTS_N_INSNS (1), /* cost of an add instruction */
1264 COSTS_N_INSNS (2), /* cost of a lea instruction */
1265 COSTS_N_INSNS (1), /* variable shift costs */
1266 COSTS_N_INSNS (1), /* constant shift costs */
1267 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1268 COSTS_N_INSNS (4), /* HI */
1269 COSTS_N_INSNS (3), /* SI */
1270 COSTS_N_INSNS (4), /* DI */
1271 COSTS_N_INSNS (5)}, /* other */
1272 0, /* cost of multiply per each bit set */
1273 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1274 COSTS_N_INSNS (35), /* HI */
1275 COSTS_N_INSNS (51), /* SI */
1276 COSTS_N_INSNS (83), /* DI */
1277 COSTS_N_INSNS (83)}, /* other */
1278 COSTS_N_INSNS (1), /* cost of movsx */
1279 COSTS_N_INSNS (1), /* cost of movzx */
1280 8, /* "large" insn */
1282 4, /* cost for loading QImode using movzbl */
1283 {3, 4, 3}, /* cost of loading integer registers
1284 in QImode, HImode and SImode.
1285 Relative to reg-reg move (2). */
1286 {3, 4, 3}, /* cost of storing integer registers */
1287 4, /* cost of reg,reg fld/fst */
1288 {4, 4, 12}, /* cost of loading fp registers
1289 in SFmode, DFmode and XFmode */
1290 {6, 6, 8}, /* cost of storing fp registers
1291 in SFmode, DFmode and XFmode */
1292 2, /* cost of moving MMX register */
1293 {3, 3}, /* cost of loading MMX registers
1294 in SImode and DImode */
1295 {4, 4}, /* cost of storing MMX registers
1296 in SImode and DImode */
1297 2, /* cost of moving SSE register */
1298 {4, 4, 3}, /* cost of loading SSE registers
1299 in SImode, DImode and TImode */
1300 {4, 4, 5}, /* cost of storing SSE registers
1301 in SImode, DImode and TImode */
1302 3, /* MMX or SSE register to integer */
1304 MOVD reg64, xmmreg Double FSTORE 4
1305 MOVD reg32, xmmreg Double FSTORE 4
1307 MOVD reg64, xmmreg Double FADD 3
1309 MOVD reg32, xmmreg Double FADD 3
1311 32, /* size of l1 cache. */
1312 2048, /* size of l2 cache. */
1313 64, /* size of prefetch block */
1314 100, /* number of parallel prefetches */
1315 2, /* Branch cost */
1316 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1317 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1318 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1319 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1320 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1321 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1324 4, /* scalar_stmt_cost. */
1325 2, /* scalar load_cost. */
1326 2, /* scalar_store_cost. */
1327 6, /* vec_stmt_cost. */
1328 0, /* vec_to_scalar_cost. */
1329 2, /* scalar_to_vec_cost. */
1330 2, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 2, /* vec_store_cost. */
1333 2, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1337 static stringop_algs pentium4_memcpy
[2] = {
1338 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1339 DUMMY_STRINGOP_ALGS
};
1340 static stringop_algs pentium4_memset
[2] = {
1341 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1342 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1343 DUMMY_STRINGOP_ALGS
};
1346 struct processor_costs pentium4_cost
= {
1347 COSTS_N_INSNS (1), /* cost of an add instruction */
1348 COSTS_N_INSNS (3), /* cost of a lea instruction */
1349 COSTS_N_INSNS (4), /* variable shift costs */
1350 COSTS_N_INSNS (4), /* constant shift costs */
1351 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1352 COSTS_N_INSNS (15), /* HI */
1353 COSTS_N_INSNS (15), /* SI */
1354 COSTS_N_INSNS (15), /* DI */
1355 COSTS_N_INSNS (15)}, /* other */
1356 0, /* cost of multiply per each bit set */
1357 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1358 COSTS_N_INSNS (56), /* HI */
1359 COSTS_N_INSNS (56), /* SI */
1360 COSTS_N_INSNS (56), /* DI */
1361 COSTS_N_INSNS (56)}, /* other */
1362 COSTS_N_INSNS (1), /* cost of movsx */
1363 COSTS_N_INSNS (1), /* cost of movzx */
1364 16, /* "large" insn */
1366 2, /* cost for loading QImode using movzbl */
1367 {4, 5, 4}, /* cost of loading integer registers
1368 in QImode, HImode and SImode.
1369 Relative to reg-reg move (2). */
1370 {2, 3, 2}, /* cost of storing integer registers */
1371 2, /* cost of reg,reg fld/fst */
1372 {2, 2, 6}, /* cost of loading fp registers
1373 in SFmode, DFmode and XFmode */
1374 {4, 4, 6}, /* cost of storing fp registers
1375 in SFmode, DFmode and XFmode */
1376 2, /* cost of moving MMX register */
1377 {2, 2}, /* cost of loading MMX registers
1378 in SImode and DImode */
1379 {2, 2}, /* cost of storing MMX registers
1380 in SImode and DImode */
1381 12, /* cost of moving SSE register */
1382 {12, 12, 12}, /* cost of loading SSE registers
1383 in SImode, DImode and TImode */
1384 {2, 2, 8}, /* cost of storing SSE registers
1385 in SImode, DImode and TImode */
1386 10, /* MMX or SSE register to integer */
1387 8, /* size of l1 cache. */
1388 256, /* size of l2 cache. */
1389 64, /* size of prefetch block */
1390 6, /* number of parallel prefetches */
1391 2, /* Branch cost */
1392 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1393 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1394 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1395 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1396 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1397 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1400 1, /* scalar_stmt_cost. */
1401 1, /* scalar load_cost. */
1402 1, /* scalar_store_cost. */
1403 1, /* vec_stmt_cost. */
1404 1, /* vec_to_scalar_cost. */
1405 1, /* scalar_to_vec_cost. */
1406 1, /* vec_align_load_cost. */
1407 2, /* vec_unalign_load_cost. */
1408 1, /* vec_store_cost. */
1409 3, /* cond_taken_branch_cost. */
1410 1, /* cond_not_taken_branch_cost. */
1413 static stringop_algs nocona_memcpy
[2] = {
1414 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1415 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1416 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1418 static stringop_algs nocona_memset
[2] = {
1419 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1420 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1421 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1422 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1425 struct processor_costs nocona_cost
= {
1426 COSTS_N_INSNS (1), /* cost of an add instruction */
1427 COSTS_N_INSNS (1), /* cost of a lea instruction */
1428 COSTS_N_INSNS (1), /* variable shift costs */
1429 COSTS_N_INSNS (1), /* constant shift costs */
1430 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1431 COSTS_N_INSNS (10), /* HI */
1432 COSTS_N_INSNS (10), /* SI */
1433 COSTS_N_INSNS (10), /* DI */
1434 COSTS_N_INSNS (10)}, /* other */
1435 0, /* cost of multiply per each bit set */
1436 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1437 COSTS_N_INSNS (66), /* HI */
1438 COSTS_N_INSNS (66), /* SI */
1439 COSTS_N_INSNS (66), /* DI */
1440 COSTS_N_INSNS (66)}, /* other */
1441 COSTS_N_INSNS (1), /* cost of movsx */
1442 COSTS_N_INSNS (1), /* cost of movzx */
1443 16, /* "large" insn */
1444 17, /* MOVE_RATIO */
1445 4, /* cost for loading QImode using movzbl */
1446 {4, 4, 4}, /* cost of loading integer registers
1447 in QImode, HImode and SImode.
1448 Relative to reg-reg move (2). */
1449 {4, 4, 4}, /* cost of storing integer registers */
1450 3, /* cost of reg,reg fld/fst */
1451 {12, 12, 12}, /* cost of loading fp registers
1452 in SFmode, DFmode and XFmode */
1453 {4, 4, 4}, /* cost of storing fp registers
1454 in SFmode, DFmode and XFmode */
1455 6, /* cost of moving MMX register */
1456 {12, 12}, /* cost of loading MMX registers
1457 in SImode and DImode */
1458 {12, 12}, /* cost of storing MMX registers
1459 in SImode and DImode */
1460 6, /* cost of moving SSE register */
1461 {12, 12, 12}, /* cost of loading SSE registers
1462 in SImode, DImode and TImode */
1463 {12, 12, 12}, /* cost of storing SSE registers
1464 in SImode, DImode and TImode */
1465 8, /* MMX or SSE register to integer */
1466 8, /* size of l1 cache. */
1467 1024, /* size of l2 cache. */
1468 128, /* size of prefetch block */
1469 8, /* number of parallel prefetches */
1470 1, /* Branch cost */
1471 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1472 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1473 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1474 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1475 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1476 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1479 1, /* scalar_stmt_cost. */
1480 1, /* scalar load_cost. */
1481 1, /* scalar_store_cost. */
1482 1, /* vec_stmt_cost. */
1483 1, /* vec_to_scalar_cost. */
1484 1, /* scalar_to_vec_cost. */
1485 1, /* vec_align_load_cost. */
1486 2, /* vec_unalign_load_cost. */
1487 1, /* vec_store_cost. */
1488 3, /* cond_taken_branch_cost. */
1489 1, /* cond_not_taken_branch_cost. */
1492 static stringop_algs atom_memcpy
[2] = {
1493 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1494 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1495 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1496 static stringop_algs atom_memset
[2] = {
1497 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1498 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1499 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1500 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1502 struct processor_costs atom_cost
= {
1503 COSTS_N_INSNS (1), /* cost of an add instruction */
1504 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1505 COSTS_N_INSNS (1), /* variable shift costs */
1506 COSTS_N_INSNS (1), /* constant shift costs */
1507 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1508 COSTS_N_INSNS (4), /* HI */
1509 COSTS_N_INSNS (3), /* SI */
1510 COSTS_N_INSNS (4), /* DI */
1511 COSTS_N_INSNS (2)}, /* other */
1512 0, /* cost of multiply per each bit set */
1513 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1514 COSTS_N_INSNS (26), /* HI */
1515 COSTS_N_INSNS (42), /* SI */
1516 COSTS_N_INSNS (74), /* DI */
1517 COSTS_N_INSNS (74)}, /* other */
1518 COSTS_N_INSNS (1), /* cost of movsx */
1519 COSTS_N_INSNS (1), /* cost of movzx */
1520 8, /* "large" insn */
1521 17, /* MOVE_RATIO */
1522 4, /* cost for loading QImode using movzbl */
1523 {4, 4, 4}, /* cost of loading integer registers
1524 in QImode, HImode and SImode.
1525 Relative to reg-reg move (2). */
1526 {4, 4, 4}, /* cost of storing integer registers */
1527 4, /* cost of reg,reg fld/fst */
1528 {12, 12, 12}, /* cost of loading fp registers
1529 in SFmode, DFmode and XFmode */
1530 {6, 6, 8}, /* cost of storing fp registers
1531 in SFmode, DFmode and XFmode */
1532 2, /* cost of moving MMX register */
1533 {8, 8}, /* cost of loading MMX registers
1534 in SImode and DImode */
1535 {8, 8}, /* cost of storing MMX registers
1536 in SImode and DImode */
1537 2, /* cost of moving SSE register */
1538 {8, 8, 8}, /* cost of loading SSE registers
1539 in SImode, DImode and TImode */
1540 {8, 8, 8}, /* cost of storing SSE registers
1541 in SImode, DImode and TImode */
1542 5, /* MMX or SSE register to integer */
1543 32, /* size of l1 cache. */
1544 256, /* size of l2 cache. */
1545 64, /* size of prefetch block */
1546 6, /* number of parallel prefetches */
1547 3, /* Branch cost */
1548 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1549 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1550 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1551 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1552 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1553 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1556 1, /* scalar_stmt_cost. */
1557 1, /* scalar load_cost. */
1558 1, /* scalar_store_cost. */
1559 1, /* vec_stmt_cost. */
1560 1, /* vec_to_scalar_cost. */
1561 1, /* scalar_to_vec_cost. */
1562 1, /* vec_align_load_cost. */
1563 2, /* vec_unalign_load_cost. */
1564 1, /* vec_store_cost. */
1565 3, /* cond_taken_branch_cost. */
1566 1, /* cond_not_taken_branch_cost. */
1569 static stringop_algs slm_memcpy
[2] = {
1570 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1571 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1572 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1573 static stringop_algs slm_memset
[2] = {
1574 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1575 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1576 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1577 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1579 struct processor_costs slm_cost
= {
1580 COSTS_N_INSNS (1), /* cost of an add instruction */
1581 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1582 COSTS_N_INSNS (1), /* variable shift costs */
1583 COSTS_N_INSNS (1), /* constant shift costs */
1584 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1585 COSTS_N_INSNS (4), /* HI */
1586 COSTS_N_INSNS (3), /* SI */
1587 COSTS_N_INSNS (4), /* DI */
1588 COSTS_N_INSNS (2)}, /* other */
1589 0, /* cost of multiply per each bit set */
1590 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1591 COSTS_N_INSNS (26), /* HI */
1592 COSTS_N_INSNS (42), /* SI */
1593 COSTS_N_INSNS (74), /* DI */
1594 COSTS_N_INSNS (74)}, /* other */
1595 COSTS_N_INSNS (1), /* cost of movsx */
1596 COSTS_N_INSNS (1), /* cost of movzx */
1597 8, /* "large" insn */
1598 17, /* MOVE_RATIO */
1599 4, /* cost for loading QImode using movzbl */
1600 {4, 4, 4}, /* cost of loading integer registers
1601 in QImode, HImode and SImode.
1602 Relative to reg-reg move (2). */
1603 {4, 4, 4}, /* cost of storing integer registers */
1604 4, /* cost of reg,reg fld/fst */
1605 {12, 12, 12}, /* cost of loading fp registers
1606 in SFmode, DFmode and XFmode */
1607 {6, 6, 8}, /* cost of storing fp registers
1608 in SFmode, DFmode and XFmode */
1609 2, /* cost of moving MMX register */
1610 {8, 8}, /* cost of loading MMX registers
1611 in SImode and DImode */
1612 {8, 8}, /* cost of storing MMX registers
1613 in SImode and DImode */
1614 2, /* cost of moving SSE register */
1615 {8, 8, 8}, /* cost of loading SSE registers
1616 in SImode, DImode and TImode */
1617 {8, 8, 8}, /* cost of storing SSE registers
1618 in SImode, DImode and TImode */
1619 5, /* MMX or SSE register to integer */
1620 32, /* size of l1 cache. */
1621 256, /* size of l2 cache. */
1622 64, /* size of prefetch block */
1623 6, /* number of parallel prefetches */
1624 3, /* Branch cost */
1625 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1626 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1627 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1628 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1629 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1630 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1633 1, /* scalar_stmt_cost. */
1634 1, /* scalar load_cost. */
1635 1, /* scalar_store_cost. */
1636 1, /* vec_stmt_cost. */
1637 1, /* vec_to_scalar_cost. */
1638 1, /* scalar_to_vec_cost. */
1639 1, /* vec_align_load_cost. */
1640 2, /* vec_unalign_load_cost. */
1641 1, /* vec_store_cost. */
1642 3, /* cond_taken_branch_cost. */
1643 1, /* cond_not_taken_branch_cost. */
1646 /* Generic should produce code tuned for Core-i7 (and newer chips)
1647 and btver1 (and newer chips). */
1649 static stringop_algs generic_memcpy
[2] = {
1650 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1651 {-1, libcall
, false}}},
1652 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1653 {-1, libcall
, false}}}};
1654 static stringop_algs generic_memset
[2] = {
1655 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1656 {-1, libcall
, false}}},
1657 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1658 {-1, libcall
, false}}}};
1660 struct processor_costs generic_cost
= {
1661 COSTS_N_INSNS (1), /* cost of an add instruction */
1662 /* On all chips taken into consideration lea is 2 cycles and more. With
1663 this cost however our current implementation of synth_mult results in
1664 use of unnecessary temporary registers causing regression on several
1665 SPECfp benchmarks. */
1666 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1667 COSTS_N_INSNS (1), /* variable shift costs */
1668 COSTS_N_INSNS (1), /* constant shift costs */
1669 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1670 COSTS_N_INSNS (4), /* HI */
1671 COSTS_N_INSNS (3), /* SI */
1672 COSTS_N_INSNS (4), /* DI */
1673 COSTS_N_INSNS (2)}, /* other */
1674 0, /* cost of multiply per each bit set */
1675 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1676 COSTS_N_INSNS (26), /* HI */
1677 COSTS_N_INSNS (42), /* SI */
1678 COSTS_N_INSNS (74), /* DI */
1679 COSTS_N_INSNS (74)}, /* other */
1680 COSTS_N_INSNS (1), /* cost of movsx */
1681 COSTS_N_INSNS (1), /* cost of movzx */
1682 8, /* "large" insn */
1683 17, /* MOVE_RATIO */
1684 4, /* cost for loading QImode using movzbl */
1685 {4, 4, 4}, /* cost of loading integer registers
1686 in QImode, HImode and SImode.
1687 Relative to reg-reg move (2). */
1688 {4, 4, 4}, /* cost of storing integer registers */
1689 4, /* cost of reg,reg fld/fst */
1690 {12, 12, 12}, /* cost of loading fp registers
1691 in SFmode, DFmode and XFmode */
1692 {6, 6, 8}, /* cost of storing fp registers
1693 in SFmode, DFmode and XFmode */
1694 2, /* cost of moving MMX register */
1695 {8, 8}, /* cost of loading MMX registers
1696 in SImode and DImode */
1697 {8, 8}, /* cost of storing MMX registers
1698 in SImode and DImode */
1699 2, /* cost of moving SSE register */
1700 {8, 8, 8}, /* cost of loading SSE registers
1701 in SImode, DImode and TImode */
1702 {8, 8, 8}, /* cost of storing SSE registers
1703 in SImode, DImode and TImode */
1704 5, /* MMX or SSE register to integer */
1705 32, /* size of l1 cache. */
1706 512, /* size of l2 cache. */
1707 64, /* size of prefetch block */
1708 6, /* number of parallel prefetches */
1709 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1710 value is increased to perhaps more appropriate value of 5. */
1711 3, /* Branch cost */
1712 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1713 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1714 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1715 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1716 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1717 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1720 1, /* scalar_stmt_cost. */
1721 1, /* scalar load_cost. */
1722 1, /* scalar_store_cost. */
1723 1, /* vec_stmt_cost. */
1724 1, /* vec_to_scalar_cost. */
1725 1, /* scalar_to_vec_cost. */
1726 1, /* vec_align_load_cost. */
1727 2, /* vec_unalign_load_cost. */
1728 1, /* vec_store_cost. */
1729 3, /* cond_taken_branch_cost. */
1730 1, /* cond_not_taken_branch_cost. */
1733 /* core_cost should produce code tuned for Core familly of CPUs. */
1734 static stringop_algs core_memcpy
[2] = {
1735 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1736 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1737 {-1, libcall
, false}}}};
1738 static stringop_algs core_memset
[2] = {
1739 {libcall
, {{6, loop_1_byte
, true},
1741 {8192, rep_prefix_4_byte
, true},
1742 {-1, libcall
, false}}},
1743 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1744 {-1, libcall
, false}}}};
1747 struct processor_costs core_cost
= {
1748 COSTS_N_INSNS (1), /* cost of an add instruction */
1749 /* On all chips taken into consideration lea is 2 cycles and more. With
1750 this cost however our current implementation of synth_mult results in
1751 use of unnecessary temporary registers causing regression on several
1752 SPECfp benchmarks. */
1753 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1754 COSTS_N_INSNS (1), /* variable shift costs */
1755 COSTS_N_INSNS (1), /* constant shift costs */
1756 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1757 COSTS_N_INSNS (4), /* HI */
1758 COSTS_N_INSNS (3), /* SI */
1759 COSTS_N_INSNS (4), /* DI */
1760 COSTS_N_INSNS (2)}, /* other */
1761 0, /* cost of multiply per each bit set */
1762 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1763 COSTS_N_INSNS (26), /* HI */
1764 COSTS_N_INSNS (42), /* SI */
1765 COSTS_N_INSNS (74), /* DI */
1766 COSTS_N_INSNS (74)}, /* other */
1767 COSTS_N_INSNS (1), /* cost of movsx */
1768 COSTS_N_INSNS (1), /* cost of movzx */
1769 8, /* "large" insn */
1770 17, /* MOVE_RATIO */
1771 4, /* cost for loading QImode using movzbl */
1772 {4, 4, 4}, /* cost of loading integer registers
1773 in QImode, HImode and SImode.
1774 Relative to reg-reg move (2). */
1775 {4, 4, 4}, /* cost of storing integer registers */
1776 4, /* cost of reg,reg fld/fst */
1777 {12, 12, 12}, /* cost of loading fp registers
1778 in SFmode, DFmode and XFmode */
1779 {6, 6, 8}, /* cost of storing fp registers
1780 in SFmode, DFmode and XFmode */
1781 2, /* cost of moving MMX register */
1782 {8, 8}, /* cost of loading MMX registers
1783 in SImode and DImode */
1784 {8, 8}, /* cost of storing MMX registers
1785 in SImode and DImode */
1786 2, /* cost of moving SSE register */
1787 {8, 8, 8}, /* cost of loading SSE registers
1788 in SImode, DImode and TImode */
1789 {8, 8, 8}, /* cost of storing SSE registers
1790 in SImode, DImode and TImode */
1791 5, /* MMX or SSE register to integer */
1792 64, /* size of l1 cache. */
1793 512, /* size of l2 cache. */
1794 64, /* size of prefetch block */
1795 6, /* number of parallel prefetches */
1796 /* FIXME perhaps more appropriate value is 5. */
1797 3, /* Branch cost */
1798 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1799 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1800 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1801 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1802 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1803 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1806 1, /* scalar_stmt_cost. */
1807 1, /* scalar load_cost. */
1808 1, /* scalar_store_cost. */
1809 1, /* vec_stmt_cost. */
1810 1, /* vec_to_scalar_cost. */
1811 1, /* scalar_to_vec_cost. */
1812 1, /* vec_align_load_cost. */
1813 2, /* vec_unalign_load_cost. */
1814 1, /* vec_store_cost. */
1815 3, /* cond_taken_branch_cost. */
1816 1, /* cond_not_taken_branch_cost. */
1820 /* Set by -mtune. */
1821 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1823 /* Set by -mtune or -Os. */
1824 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1826 /* Processor feature/optimization bitmasks. */
1827 #define m_386 (1<<PROCESSOR_I386)
1828 #define m_486 (1<<PROCESSOR_I486)
1829 #define m_PENT (1<<PROCESSOR_PENTIUM)
1830 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1831 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1832 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1833 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1834 #define m_CORE2 (1<<PROCESSOR_CORE2)
1835 #define m_COREI7 (1<<PROCESSOR_COREI7)
1836 #define m_COREI7_AVX (1<<PROCESSOR_COREI7_AVX)
1837 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1838 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_COREI7_AVX | m_HASWELL)
1839 #define m_ATOM (1<<PROCESSOR_ATOM)
1840 #define m_SLM (1<<PROCESSOR_SLM)
1842 #define m_GEODE (1<<PROCESSOR_GEODE)
1843 #define m_K6 (1<<PROCESSOR_K6)
1844 #define m_K6_GEODE (m_K6 | m_GEODE)
1845 #define m_K8 (1<<PROCESSOR_K8)
1846 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1847 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1848 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1849 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1850 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1851 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1852 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1853 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1854 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1855 #define m_BTVER (m_BTVER1 | m_BTVER2)
1856 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1858 #define m_GENERIC (1<<PROCESSOR_GENERIC)
1860 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1862 #define DEF_TUNE(tune, name, selector) name,
1863 #include "x86-tune.def"
1867 /* Feature tests against the various tunings. */
1868 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1870 /* Feature tests against the various tunings used to create ix86_tune_features
1871 based on the processor mask. */
1872 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1874 #define DEF_TUNE(tune, name, selector) selector,
1875 #include "x86-tune.def"
1879 /* Feature tests against the various architecture variations. */
1880 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1882 /* Feature tests against the various architecture variations, used to create
1883 ix86_arch_features based on the processor mask. */
1884 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1885 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1886 ~(m_386
| m_486
| m_PENT
| m_K6
),
1888 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1891 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1894 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1897 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1901 /* In case the average insn count for single function invocation is
1902 lower than this constant, emit fast (but longer) prologue and
1904 #define FAST_PROLOGUE_INSN_COUNT 20
1906 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1907 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1908 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1909 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1911 /* Array of the smallest class containing reg number REGNO, indexed by
1912 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1914 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1916 /* ax, dx, cx, bx */
1917 AREG
, DREG
, CREG
, BREG
,
1918 /* si, di, bp, sp */
1919 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1921 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1922 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1925 /* flags, fpsr, fpcr, frame */
1926 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1928 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1931 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1934 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1935 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1936 /* SSE REX registers */
1937 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1939 /* AVX-512 SSE registers */
1940 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1941 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1942 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1943 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1944 /* Mask registers. */
1945 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
1946 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
1947 /* MPX bound registers */
1948 BND_REGS
, BND_REGS
, BND_REGS
, BND_REGS
,
1951 /* The "default" register map used in 32bit mode. */
1953 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1955 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1956 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1957 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1958 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1959 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1960 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1961 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1962 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
1963 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
1964 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
1965 101, 102, 103, 104, /* bound registers */
1968 /* The "default" register map used in 64bit mode. */
1970 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1972 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1973 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1974 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1975 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1976 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1977 8,9,10,11,12,13,14,15, /* extended integer registers */
1978 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1979 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
1980 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
1981 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
1982 126, 127, 128, 129, /* bound registers */
1985 /* Define the register numbers to be used in Dwarf debugging information.
1986 The SVR4 reference port C compiler uses the following register numbers
1987 in its Dwarf output code:
1988 0 for %eax (gcc regno = 0)
1989 1 for %ecx (gcc regno = 2)
1990 2 for %edx (gcc regno = 1)
1991 3 for %ebx (gcc regno = 3)
1992 4 for %esp (gcc regno = 7)
1993 5 for %ebp (gcc regno = 6)
1994 6 for %esi (gcc regno = 4)
1995 7 for %edi (gcc regno = 5)
1996 The following three DWARF register numbers are never generated by
1997 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1998 believes these numbers have these meanings.
1999 8 for %eip (no gcc equivalent)
2000 9 for %eflags (gcc regno = 17)
2001 10 for %trapno (no gcc equivalent)
2002 It is not at all clear how we should number the FP stack registers
2003 for the x86 architecture. If the version of SDB on x86/svr4 were
2004 a bit less brain dead with respect to floating-point then we would
2005 have a precedent to follow with respect to DWARF register numbers
2006 for x86 FP registers, but the SDB on x86/svr4 is so completely
2007 broken with respect to FP registers that it is hardly worth thinking
2008 of it as something to strive for compatibility with.
2009 The version of x86/svr4 SDB I have at the moment does (partially)
2010 seem to believe that DWARF register number 11 is associated with
2011 the x86 register %st(0), but that's about all. Higher DWARF
2012 register numbers don't seem to be associated with anything in
2013 particular, and even for DWARF regno 11, SDB only seems to under-
2014 stand that it should say that a variable lives in %st(0) (when
2015 asked via an `=' command) if we said it was in DWARF regno 11,
2016 but SDB still prints garbage when asked for the value of the
2017 variable in question (via a `/' command).
2018 (Also note that the labels SDB prints for various FP stack regs
2019 when doing an `x' command are all wrong.)
2020 Note that these problems generally don't affect the native SVR4
2021 C compiler because it doesn't allow the use of -O with -g and
2022 because when it is *not* optimizing, it allocates a memory
2023 location for each floating-point variable, and the memory
2024 location is what gets described in the DWARF AT_location
2025 attribute for the variable in question.
2026 Regardless of the severe mental illness of the x86/svr4 SDB, we
2027 do something sensible here and we use the following DWARF
2028 register numbers. Note that these are all stack-top-relative
2030 11 for %st(0) (gcc regno = 8)
2031 12 for %st(1) (gcc regno = 9)
2032 13 for %st(2) (gcc regno = 10)
2033 14 for %st(3) (gcc regno = 11)
2034 15 for %st(4) (gcc regno = 12)
2035 16 for %st(5) (gcc regno = 13)
2036 17 for %st(6) (gcc regno = 14)
2037 18 for %st(7) (gcc regno = 15)
2039 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2041 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2042 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2043 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2044 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2045 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2046 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2047 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2048 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2049 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2050 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2051 -1, -1, -1, -1, /* bound registers */
2054 /* Define parameter passing and return registers. */
2056 static int const x86_64_int_parameter_registers
[6] =
2058 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2061 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2063 CX_REG
, DX_REG
, R8_REG
, R9_REG
2066 static int const x86_64_int_return_registers
[4] =
2068 AX_REG
, DX_REG
, DI_REG
, SI_REG
2071 /* Additional registers that are clobbered by SYSV calls. */
2073 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2077 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2078 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2081 /* Define the structure for the machine field in struct function. */
2083 struct GTY(()) stack_local_entry
{
2084 unsigned short mode
;
2087 struct stack_local_entry
*next
;
2090 /* Structure describing stack frame layout.
2091 Stack grows downward:
2097 saved static chain if ix86_static_chain_on_stack
2099 saved frame pointer if frame_pointer_needed
2100 <- HARD_FRAME_POINTER
2106 <- sse_regs_save_offset
2109 [va_arg registers] |
2113 [padding2] | = to_allocate
2122 int outgoing_arguments_size
;
2124 /* The offsets relative to ARG_POINTER. */
2125 HOST_WIDE_INT frame_pointer_offset
;
2126 HOST_WIDE_INT hard_frame_pointer_offset
;
2127 HOST_WIDE_INT stack_pointer_offset
;
2128 HOST_WIDE_INT hfp_save_offset
;
2129 HOST_WIDE_INT reg_save_offset
;
2130 HOST_WIDE_INT sse_reg_save_offset
;
2132 /* When save_regs_using_mov is set, emit prologue using
2133 move instead of push instructions. */
2134 bool save_regs_using_mov
;
2137 /* Which cpu are we scheduling for. */
2138 enum attr_cpu ix86_schedule
;
2140 /* Which cpu are we optimizing for. */
2141 enum processor_type ix86_tune
;
2143 /* Which instruction set architecture to use. */
2144 enum processor_type ix86_arch
;
2146 /* True if processor has SSE prefetch instruction. */
2147 unsigned char x86_prefetch_sse
;
2149 /* -mstackrealign option */
2150 static const char ix86_force_align_arg_pointer_string
[]
2151 = "force_align_arg_pointer";
2153 static rtx (*ix86_gen_leave
) (void);
2154 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2155 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2156 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2157 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2158 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2159 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2160 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2161 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2162 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2163 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2164 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2166 /* Preferred alignment for stack boundary in bits. */
2167 unsigned int ix86_preferred_stack_boundary
;
2169 /* Alignment for incoming stack boundary in bits specified at
2171 static unsigned int ix86_user_incoming_stack_boundary
;
2173 /* Default alignment for incoming stack boundary in bits. */
2174 static unsigned int ix86_default_incoming_stack_boundary
;
2176 /* Alignment for incoming stack boundary in bits. */
2177 unsigned int ix86_incoming_stack_boundary
;
2179 /* Calling abi specific va_list type nodes. */
2180 static GTY(()) tree sysv_va_list_type_node
;
2181 static GTY(()) tree ms_va_list_type_node
;
2183 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2184 char internal_label_prefix
[16];
2185 int internal_label_prefix_len
;
2187 /* Fence to use after loop using movnt. */
2190 /* Register class used for passing given 64bit part of the argument.
2191 These represent classes as documented by the PS ABI, with the exception
2192 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2193 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2195 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2196 whenever possible (upper half does contain padding). */
2197 enum x86_64_reg_class
2200 X86_64_INTEGER_CLASS
,
2201 X86_64_INTEGERSI_CLASS
,
2208 X86_64_COMPLEX_X87_CLASS
,
2212 #define MAX_CLASSES 4
2214 /* Table of constants used by fldpi, fldln2, etc.... */
2215 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2216 static bool ext_80387_constants_init
= 0;
2219 static struct machine_function
* ix86_init_machine_status (void);
2220 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2221 static bool ix86_function_value_regno_p (const unsigned int);
2222 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2224 static rtx
ix86_static_chain (const_tree
, bool);
2225 static int ix86_function_regparm (const_tree
, const_tree
);
2226 static void ix86_compute_frame_layout (struct ix86_frame
*);
2227 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2229 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2230 static tree
ix86_canonical_va_list_type (tree
);
2231 static void predict_jump (int);
2232 static unsigned int split_stack_prologue_scratch_regno (void);
2233 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2235 enum ix86_function_specific_strings
2237 IX86_FUNCTION_SPECIFIC_ARCH
,
2238 IX86_FUNCTION_SPECIFIC_TUNE
,
2239 IX86_FUNCTION_SPECIFIC_MAX
2242 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2243 const char *, enum fpmath_unit
, bool);
2244 static void ix86_function_specific_save (struct cl_target_option
*,
2245 struct gcc_options
*opts
);
2246 static void ix86_function_specific_restore (struct gcc_options
*opts
,
2247 struct cl_target_option
*);
2248 static void ix86_function_specific_print (FILE *, int,
2249 struct cl_target_option
*);
2250 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2251 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2252 struct gcc_options
*,
2253 struct gcc_options
*,
2254 struct gcc_options
*);
2255 static bool ix86_can_inline_p (tree
, tree
);
2256 static void ix86_set_current_function (tree
);
2257 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2259 static enum calling_abi
ix86_function_abi (const_tree
);
2262 #ifndef SUBTARGET32_DEFAULT_CPU
2263 #define SUBTARGET32_DEFAULT_CPU "i386"
2266 /* Whether -mtune= or -march= were specified */
2267 static int ix86_tune_defaulted
;
2268 static int ix86_arch_specified
;
2270 /* Vectorization library interface and handlers. */
2271 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2273 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2274 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2276 /* Processor target table, indexed by processor number */
2279 const struct processor_costs
*cost
; /* Processor costs */
2280 const int align_loop
; /* Default alignments. */
2281 const int align_loop_max_skip
;
2282 const int align_jump
;
2283 const int align_jump_max_skip
;
2284 const int align_func
;
2287 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2289 {&i386_cost
, 4, 3, 4, 3, 4},
2290 {&i486_cost
, 16, 15, 16, 15, 16},
2291 {&pentium_cost
, 16, 7, 16, 7, 16},
2292 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2293 {&geode_cost
, 0, 0, 0, 0, 0},
2294 {&k6_cost
, 32, 7, 32, 7, 32},
2295 {&athlon_cost
, 16, 7, 16, 7, 16},
2296 {&pentium4_cost
, 0, 0, 0, 0, 0},
2297 {&k8_cost
, 16, 7, 16, 7, 16},
2298 {&nocona_cost
, 0, 0, 0, 0, 0},
2300 {&core_cost
, 16, 10, 16, 10, 16},
2302 {&core_cost
, 16, 10, 16, 10, 16},
2304 {&core_cost
, 16, 10, 16, 10, 16},
2306 {&core_cost
, 16, 10, 16, 10, 16},
2307 {&generic_cost
, 16, 10, 16, 10, 16},
2308 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2309 {&bdver1_cost
, 16, 10, 16, 7, 11},
2310 {&bdver2_cost
, 16, 10, 16, 7, 11},
2311 {&bdver3_cost
, 16, 10, 16, 7, 11},
2312 {&btver1_cost
, 16, 10, 16, 7, 11},
2313 {&btver2_cost
, 16, 10, 16, 7, 11},
2314 {&atom_cost
, 16, 15, 16, 7, 16},
2315 {&slm_cost
, 16, 15, 16, 7, 16}
2318 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2354 gate_insert_vzeroupper (void)
2356 return TARGET_AVX
&& !TARGET_AVX512F
&& TARGET_VZEROUPPER
;
2360 rest_of_handle_insert_vzeroupper (void)
2364 /* vzeroupper instructions are inserted immediately after reload to
2365 account for possible spills from 256bit registers. The pass
2366 reuses mode switching infrastructure by re-running mode insertion
2367 pass, so disable entities that have already been processed. */
2368 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2369 ix86_optimize_mode_switching
[i
] = 0;
2371 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2373 /* Call optimize_mode_switching. */
2374 g
->get_passes ()->execute_pass_mode_switching ();
2380 const pass_data pass_data_insert_vzeroupper
=
2382 RTL_PASS
, /* type */
2383 "vzeroupper", /* name */
2384 OPTGROUP_NONE
, /* optinfo_flags */
2385 true, /* has_gate */
2386 true, /* has_execute */
2387 TV_NONE
, /* tv_id */
2388 0, /* properties_required */
2389 0, /* properties_provided */
2390 0, /* properties_destroyed */
2391 0, /* todo_flags_start */
2392 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2395 class pass_insert_vzeroupper
: public rtl_opt_pass
2398 pass_insert_vzeroupper(gcc::context
*ctxt
)
2399 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2402 /* opt_pass methods: */
2403 bool gate () { return gate_insert_vzeroupper (); }
2404 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2406 }; // class pass_insert_vzeroupper
2411 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2413 return new pass_insert_vzeroupper (ctxt
);
2416 /* Return true if a red-zone is in use. */
2419 ix86_using_red_zone (void)
2421 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2424 /* Return a string that documents the current -m options. The caller is
2425 responsible for freeing the string. */
2428 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2429 const char *tune
, enum fpmath_unit fpmath
,
2432 struct ix86_target_opts
2434 const char *option
; /* option string */
2435 HOST_WIDE_INT mask
; /* isa mask options */
2438 /* This table is ordered so that options like -msse4.2 that imply
2439 preceding options while match those first. */
2440 static struct ix86_target_opts isa_opts
[] =
2442 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2443 { "-mfma", OPTION_MASK_ISA_FMA
},
2444 { "-mxop", OPTION_MASK_ISA_XOP
},
2445 { "-mlwp", OPTION_MASK_ISA_LWP
},
2446 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2447 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2448 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2449 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2450 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2451 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2452 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2453 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2454 { "-msse3", OPTION_MASK_ISA_SSE3
},
2455 { "-msse2", OPTION_MASK_ISA_SSE2
},
2456 { "-msse", OPTION_MASK_ISA_SSE
},
2457 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2458 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2459 { "-mmmx", OPTION_MASK_ISA_MMX
},
2460 { "-mabm", OPTION_MASK_ISA_ABM
},
2461 { "-mbmi", OPTION_MASK_ISA_BMI
},
2462 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2463 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2464 { "-mhle", OPTION_MASK_ISA_HLE
},
2465 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2466 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2467 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2468 { "-madx", OPTION_MASK_ISA_ADX
},
2469 { "-mtbm", OPTION_MASK_ISA_TBM
},
2470 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2471 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2472 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2473 { "-maes", OPTION_MASK_ISA_AES
},
2474 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2475 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2476 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2477 { "-mf16c", OPTION_MASK_ISA_F16C
},
2478 { "-mrtm", OPTION_MASK_ISA_RTM
},
2479 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2480 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2481 { "-mmpx", OPTION_MASK_ISA_MPX
},
2485 static struct ix86_target_opts flag_opts
[] =
2487 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2488 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2489 { "-m80387", MASK_80387
},
2490 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2491 { "-malign-double", MASK_ALIGN_DOUBLE
},
2492 { "-mcld", MASK_CLD
},
2493 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2494 { "-mieee-fp", MASK_IEEE_FP
},
2495 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2496 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2497 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2498 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2499 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2500 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2501 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2502 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2503 { "-mrecip", MASK_RECIP
},
2504 { "-mrtd", MASK_RTD
},
2505 { "-msseregparm", MASK_SSEREGPARM
},
2506 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2507 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2508 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2509 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2510 { "-mvzeroupper", MASK_VZEROUPPER
},
2511 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2512 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2513 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2516 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2519 char target_other
[40];
2529 memset (opts
, '\0', sizeof (opts
));
2531 /* Add -march= option. */
2534 opts
[num
][0] = "-march=";
2535 opts
[num
++][1] = arch
;
2538 /* Add -mtune= option. */
2541 opts
[num
][0] = "-mtune=";
2542 opts
[num
++][1] = tune
;
2545 /* Add -m32/-m64/-mx32. */
2546 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2548 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2552 isa
&= ~ (OPTION_MASK_ISA_64BIT
2553 | OPTION_MASK_ABI_64
2554 | OPTION_MASK_ABI_X32
);
2558 opts
[num
++][0] = abi
;
2560 /* Pick out the options in isa options. */
2561 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2563 if ((isa
& isa_opts
[i
].mask
) != 0)
2565 opts
[num
++][0] = isa_opts
[i
].option
;
2566 isa
&= ~ isa_opts
[i
].mask
;
2570 if (isa
&& add_nl_p
)
2572 opts
[num
++][0] = isa_other
;
2573 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2577 /* Add flag options. */
2578 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2580 if ((flags
& flag_opts
[i
].mask
) != 0)
2582 opts
[num
++][0] = flag_opts
[i
].option
;
2583 flags
&= ~ flag_opts
[i
].mask
;
2587 if (flags
&& add_nl_p
)
2589 opts
[num
++][0] = target_other
;
2590 sprintf (target_other
, "(other flags: %#x)", flags
);
2593 /* Add -fpmath= option. */
2596 opts
[num
][0] = "-mfpmath=";
2597 switch ((int) fpmath
)
2600 opts
[num
++][1] = "387";
2604 opts
[num
++][1] = "sse";
2607 case FPMATH_387
| FPMATH_SSE
:
2608 opts
[num
++][1] = "sse+387";
2620 gcc_assert (num
< ARRAY_SIZE (opts
));
2622 /* Size the string. */
2624 sep_len
= (add_nl_p
) ? 3 : 1;
2625 for (i
= 0; i
< num
; i
++)
2628 for (j
= 0; j
< 2; j
++)
2630 len
+= strlen (opts
[i
][j
]);
2633 /* Build the string. */
2634 ret
= ptr
= (char *) xmalloc (len
);
2637 for (i
= 0; i
< num
; i
++)
2641 for (j
= 0; j
< 2; j
++)
2642 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2649 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2657 for (j
= 0; j
< 2; j
++)
2660 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2662 line_len
+= len2
[j
];
2667 gcc_assert (ret
+ len
>= ptr
);
2672 /* Return true, if profiling code should be emitted before
2673 prologue. Otherwise it returns false.
2674 Note: For x86 with "hotfix" it is sorried. */
2676 ix86_profile_before_prologue (void)
2678 return flag_fentry
!= 0;
2681 /* Function that is callable from the debugger to print the current
2683 void ATTRIBUTE_UNUSED
2684 ix86_debug_options (void)
2686 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2687 ix86_arch_string
, ix86_tune_string
,
2692 fprintf (stderr
, "%s\n\n", opts
);
2696 fputs ("<no options>\n\n", stderr
);
2701 static const char *stringop_alg_names
[] = {
2703 #define DEF_ALG(alg, name) #name,
2704 #include "stringop.def"
2709 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2710 The string is of the following form (or comma separated list of it):
2712 strategy_alg:max_size:[align|noalign]
2714 where the full size range for the strategy is either [0, max_size] or
2715 [min_size, max_size], in which min_size is the max_size + 1 of the
2716 preceding range. The last size range must have max_size == -1.
2721 -mmemcpy-strategy=libcall:-1:noalign
2723 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2727 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2729 This is to tell the compiler to use the following strategy for memset
2730 1) when the expected size is between [1, 16], use rep_8byte strategy;
2731 2) when the size is between [17, 2048], use vector_loop;
2732 3) when the size is > 2048, use libcall. */
2734 struct stringop_size_range
2742 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2744 const struct stringop_algs
*default_algs
;
2745 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2746 char *curr_range_str
, *next_range_str
;
2750 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2752 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2754 curr_range_str
= strategy_str
;
2762 next_range_str
= strchr (curr_range_str
, ',');
2764 *next_range_str
++ = '\0';
2766 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2767 alg_name
, &maxs
, align
))
2769 error ("wrong arg %s to option %s", curr_range_str
,
2770 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2774 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2776 error ("size ranges of option %s should be increasing",
2777 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2781 for (i
= 0; i
< last_alg
; i
++)
2783 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2785 alg
= (stringop_alg
) i
;
2792 error ("wrong stringop strategy name %s specified for option %s",
2794 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2798 input_ranges
[n
].max
= maxs
;
2799 input_ranges
[n
].alg
= alg
;
2800 if (!strcmp (align
, "align"))
2801 input_ranges
[n
].noalign
= false;
2802 else if (!strcmp (align
, "noalign"))
2803 input_ranges
[n
].noalign
= true;
2806 error ("unknown alignment %s specified for option %s",
2807 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2811 curr_range_str
= next_range_str
;
2813 while (curr_range_str
);
2815 if (input_ranges
[n
- 1].max
!= -1)
2817 error ("the max value for the last size range should be -1"
2819 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2823 if (n
> MAX_STRINGOP_ALGS
)
2825 error ("too many size ranges specified in option %s",
2826 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2830 /* Now override the default algs array. */
2831 for (i
= 0; i
< n
; i
++)
2833 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2834 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2835 = input_ranges
[i
].alg
;
2836 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2837 = input_ranges
[i
].noalign
;
2842 /* parse -mtune-ctrl= option. When DUMP is true,
2843 print the features that are explicitly set. */
2846 parse_mtune_ctrl_str (bool dump
)
2848 if (!ix86_tune_ctrl_string
)
2851 char *next_feature_string
= NULL
;
2852 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
2853 char *orig
= curr_feature_string
;
2859 next_feature_string
= strchr (curr_feature_string
, ',');
2860 if (next_feature_string
)
2861 *next_feature_string
++ = '\0';
2862 if (*curr_feature_string
== '^')
2864 curr_feature_string
++;
2867 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2869 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
2871 ix86_tune_features
[i
] = !clear
;
2873 fprintf (stderr
, "Explicitly %s feature %s\n",
2874 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
2878 if (i
== X86_TUNE_LAST
)
2879 error ("Unknown parameter to option -mtune-ctrl: %s",
2880 clear
? curr_feature_string
- 1 : curr_feature_string
);
2881 curr_feature_string
= next_feature_string
;
2883 while (curr_feature_string
);
2887 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2891 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
2893 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
2896 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2898 if (ix86_tune_no_default
)
2899 ix86_tune_features
[i
] = 0;
2901 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
2906 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
2907 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2908 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
2909 ix86_tune_features
[i
] ? "on" : "off");
2912 parse_mtune_ctrl_str (dump
);
2916 /* Override various settings based on options. If MAIN_ARGS_P, the
2917 options are from the command line, otherwise they are from
2921 ix86_option_override_internal (bool main_args_p
,
2922 struct gcc_options
*opts
,
2923 struct gcc_options
*opts_set
)
2926 unsigned int ix86_arch_mask
;
2927 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
2932 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2933 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2934 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2935 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2936 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2937 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2938 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2939 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2940 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2941 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2942 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2943 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2944 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2945 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2946 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2947 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2948 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2949 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2950 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2951 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2952 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2953 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2954 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2955 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2956 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2957 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2958 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2959 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2960 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2961 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2962 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2963 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2964 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2965 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2966 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2967 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2968 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2969 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2970 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2971 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2972 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
2973 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
2974 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
2975 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
2976 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
2978 /* if this reaches 64, need to widen struct pta flags below */
2982 const char *const name
; /* processor name or nickname. */
2983 const enum processor_type processor
;
2984 const enum attr_cpu schedule
;
2985 const unsigned HOST_WIDE_INT flags
;
2987 const processor_alias_table
[] =
2989 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2990 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2991 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2992 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2993 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2994 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2995 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
2996 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
2997 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2998 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2999 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3000 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3001 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3002 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3003 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3004 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3005 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3006 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3007 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3008 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3009 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3010 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3011 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3012 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3013 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3014 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3015 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3016 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3017 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3018 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3019 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3020 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3021 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3022 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3023 {"corei7-avx", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3024 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3025 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3026 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3027 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3028 {"core-avx-i", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3029 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3030 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3031 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3032 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3033 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3034 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3035 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3036 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3037 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3038 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3040 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3041 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3042 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3043 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3044 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3045 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3047 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3048 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3049 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3050 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3051 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3052 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3053 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3054 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3055 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3056 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3057 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3058 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3059 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3060 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3061 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3062 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3063 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3064 {"k8", PROCESSOR_K8
, CPU_K8
,
3065 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3066 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3067 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3068 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3069 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3070 {"opteron", PROCESSOR_K8
, CPU_K8
,
3071 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3072 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3073 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3074 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3075 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3076 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3077 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3078 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3079 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3080 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3081 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3082 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3083 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3084 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3085 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3086 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3087 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3088 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3089 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3090 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3091 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3092 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3093 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3094 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3095 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3096 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3097 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3098 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3099 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3100 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3101 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3102 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3103 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3104 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3105 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3106 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3107 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3108 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3109 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3110 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3111 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3112 | PTA_FXSR
| PTA_XSAVE
},
3113 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3114 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3115 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3116 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3117 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3118 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3120 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3122 | PTA_HLE
/* flags are only used for -march switch. */ },
3125 /* -mrecip options. */
3128 const char *string
; /* option name */
3129 unsigned int mask
; /* mask bits to set */
3131 const recip_options
[] =
3133 { "all", RECIP_MASK_ALL
},
3134 { "none", RECIP_MASK_NONE
},
3135 { "div", RECIP_MASK_DIV
},
3136 { "sqrt", RECIP_MASK_SQRT
},
3137 { "vec-div", RECIP_MASK_VEC_DIV
},
3138 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3141 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3143 /* Set up prefix/suffix so the error messages refer to either the command
3144 line argument, or the attribute(target). */
3153 prefix
= "option(\"";
3158 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3159 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3160 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3161 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3162 #ifdef TARGET_BI_ARCH
3165 #if TARGET_BI_ARCH == 1
3166 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3167 is on and OPTION_MASK_ABI_X32 is off. We turn off
3168 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3170 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3171 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3173 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3174 on and OPTION_MASK_ABI_64 is off. We turn off
3175 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3177 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3178 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3183 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3185 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3186 OPTION_MASK_ABI_64 for TARGET_X32. */
3187 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3188 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3190 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3192 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3193 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3194 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3195 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3198 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3199 SUBTARGET_OVERRIDE_OPTIONS
;
3202 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3203 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3206 /* -fPIC is the default for x86_64. */
3207 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3208 opts
->x_flag_pic
= 2;
3210 /* Need to check -mtune=generic first. */
3211 if (opts
->x_ix86_tune_string
)
3213 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3214 || !strcmp (opts
->x_ix86_tune_string
, "i686")
3215 /* As special support for cross compilers we read -mtune=native
3216 as -mtune=generic. With native compilers we won't see the
3217 -mtune=native, as it was changed by the driver. */
3218 || !strcmp (opts
->x_ix86_tune_string
, "native"))
3220 opts
->x_ix86_tune_string
= "generic";
3222 /* If this call is for setting the option attribute, allow the
3223 generic that was previously set. */
3224 else if (!main_args_p
3225 && !strcmp (opts
->x_ix86_tune_string
, "generic"))
3227 else if (!strncmp (opts
->x_ix86_tune_string
, "generic", 7))
3228 error ("bad value (%s) for %stune=%s %s",
3229 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3230 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3231 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3232 "%stune=k8%s or %stune=generic%s instead as appropriate",
3233 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3237 if (opts
->x_ix86_arch_string
)
3238 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
3239 if (!opts
->x_ix86_tune_string
)
3241 opts
->x_ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3242 ix86_tune_defaulted
= 1;
3245 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3246 or defaulted. We need to use a sensible tune option. */
3247 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3248 || !strcmp (opts
->x_ix86_tune_string
, "x86-64")
3249 || !strcmp (opts
->x_ix86_tune_string
, "i686"))
3251 opts
->x_ix86_tune_string
= "generic";
3255 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
3256 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3258 /* rep; movq isn't available in 32-bit code. */
3259 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3260 opts
->x_ix86_stringop_alg
= no_stringop
;
3263 if (!opts
->x_ix86_arch_string
)
3264 opts
->x_ix86_arch_string
3265 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3266 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3268 ix86_arch_specified
= 1;
3270 if (opts_set
->x_ix86_pmode
)
3272 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3273 && opts
->x_ix86_pmode
== PMODE_SI
)
3274 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3275 && opts
->x_ix86_pmode
== PMODE_DI
))
3276 error ("address mode %qs not supported in the %s bit mode",
3277 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
3278 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
3281 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3282 ? PMODE_DI
: PMODE_SI
;
3284 if (!opts_set
->x_ix86_abi
)
3285 opts
->x_ix86_abi
= DEFAULT_ABI
;
3287 /* For targets using ms ABI enable ms-extensions, if not
3288 explicit turned off. For non-ms ABI we turn off this
3290 if (!opts_set
->x_flag_ms_extensions
)
3291 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3293 if (opts_set
->x_ix86_cmodel
)
3295 switch (opts
->x_ix86_cmodel
)
3299 if (opts
->x_flag_pic
)
3300 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
3301 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3302 error ("code model %qs not supported in the %s bit mode",
3308 if (opts
->x_flag_pic
)
3309 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
3310 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3311 error ("code model %qs not supported in the %s bit mode",
3313 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3314 error ("code model %qs not supported in x32 mode",
3320 if (opts
->x_flag_pic
)
3321 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
3322 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3323 error ("code model %qs not supported in the %s bit mode",
3325 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3326 error ("code model %qs not supported in x32 mode",
3331 if (opts
->x_flag_pic
)
3332 error ("code model %s does not support PIC mode", "32");
3333 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3334 error ("code model %qs not supported in the %s bit mode",
3339 if (opts
->x_flag_pic
)
3341 error ("code model %s does not support PIC mode", "kernel");
3342 opts
->x_ix86_cmodel
= CM_32
;
3344 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3345 error ("code model %qs not supported in the %s bit mode",
3355 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3356 use of rip-relative addressing. This eliminates fixups that
3357 would otherwise be needed if this object is to be placed in a
3358 DLL, and is essentially just as efficient as direct addressing. */
3359 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3360 && (TARGET_RDOS
|| TARGET_PECOFF
))
3361 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
3362 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3363 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3365 opts
->x_ix86_cmodel
= CM_32
;
3367 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
3369 error ("-masm=intel not supported in this configuration");
3370 opts
->x_ix86_asm_dialect
= ASM_ATT
;
3372 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
3373 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3374 sorry ("%i-bit mode not compiled in",
3375 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3377 for (i
= 0; i
< pta_size
; i
++)
3378 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
3380 ix86_schedule
= processor_alias_table
[i
].schedule
;
3381 ix86_arch
= processor_alias_table
[i
].processor
;
3382 /* Default cpu tuning to the architecture. */
3383 ix86_tune
= ix86_arch
;
3385 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3386 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3387 error ("CPU you selected does not support x86-64 "
3390 if (processor_alias_table
[i
].flags
& PTA_MMX
3391 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3392 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3393 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3394 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3395 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3396 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3397 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3398 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3399 if (processor_alias_table
[i
].flags
& PTA_SSE
3400 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3401 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3402 if (processor_alias_table
[i
].flags
& PTA_SSE2
3403 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3404 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3405 if (processor_alias_table
[i
].flags
& PTA_SSE3
3406 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3407 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3408 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3409 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3410 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3411 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3412 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3413 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3414 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3415 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3416 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3417 if (processor_alias_table
[i
].flags
& PTA_AVX
3418 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3419 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3420 if (processor_alias_table
[i
].flags
& PTA_AVX2
3421 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3422 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3423 if (processor_alias_table
[i
].flags
& PTA_FMA
3424 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3425 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3426 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3427 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3428 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3429 if (processor_alias_table
[i
].flags
& PTA_FMA4
3430 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3431 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3432 if (processor_alias_table
[i
].flags
& PTA_XOP
3433 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3434 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3435 if (processor_alias_table
[i
].flags
& PTA_LWP
3436 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3437 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3438 if (processor_alias_table
[i
].flags
& PTA_ABM
3439 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3440 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3441 if (processor_alias_table
[i
].flags
& PTA_BMI
3442 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3443 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3444 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3445 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3446 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3447 if (processor_alias_table
[i
].flags
& PTA_TBM
3448 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3449 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3450 if (processor_alias_table
[i
].flags
& PTA_BMI2
3451 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3452 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3453 if (processor_alias_table
[i
].flags
& PTA_CX16
3454 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3455 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3456 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3457 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3458 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3459 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3460 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3461 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3462 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3463 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3464 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3465 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3466 if (processor_alias_table
[i
].flags
& PTA_AES
3467 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3468 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3469 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3470 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3471 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3472 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3473 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3474 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3475 if (processor_alias_table
[i
].flags
& PTA_RDRND
3476 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3477 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3478 if (processor_alias_table
[i
].flags
& PTA_F16C
3479 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3480 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3481 if (processor_alias_table
[i
].flags
& PTA_RTM
3482 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3483 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3484 if (processor_alias_table
[i
].flags
& PTA_HLE
3485 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3486 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3487 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3488 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3489 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3490 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3491 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3492 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3493 if (processor_alias_table
[i
].flags
& PTA_ADX
3494 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3495 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3496 if (processor_alias_table
[i
].flags
& PTA_FXSR
3497 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3498 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3499 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3500 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3501 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3502 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3503 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3504 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3505 if (processor_alias_table
[i
].flags
& PTA_AVX512F
3506 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
3507 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
3508 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
3509 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
3510 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
3511 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
3512 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
3513 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
3514 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
3515 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
3516 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
3517 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3518 x86_prefetch_sse
= true;
3523 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
3524 error ("generic CPU can be used only for %stune=%s %s",
3525 prefix
, suffix
, sw
);
3526 else if (!strncmp (opts
->x_ix86_arch_string
, "generic", 7) || i
== pta_size
)
3527 error ("bad value (%s) for %sarch=%s %s",
3528 opts
->x_ix86_arch_string
, prefix
, suffix
, sw
);
3530 ix86_arch_mask
= 1u << ix86_arch
;
3531 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3532 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3534 for (i
= 0; i
< pta_size
; i
++)
3535 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
3537 ix86_schedule
= processor_alias_table
[i
].schedule
;
3538 ix86_tune
= processor_alias_table
[i
].processor
;
3539 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3541 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3543 if (ix86_tune_defaulted
)
3545 opts
->x_ix86_tune_string
= "x86-64";
3546 for (i
= 0; i
< pta_size
; i
++)
3547 if (! strcmp (opts
->x_ix86_tune_string
,
3548 processor_alias_table
[i
].name
))
3550 ix86_schedule
= processor_alias_table
[i
].schedule
;
3551 ix86_tune
= processor_alias_table
[i
].processor
;
3554 error ("CPU you selected does not support x86-64 "
3558 /* Intel CPUs have always interpreted SSE prefetch instructions as
3559 NOPs; so, we can enable SSE prefetch instructions even when
3560 -mtune (rather than -march) points us to a processor that has them.
3561 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3562 higher processors. */
3564 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3565 x86_prefetch_sse
= true;
3569 if (ix86_tune_specified
&& i
== pta_size
)
3570 error ("bad value (%s) for %stune=%s %s",
3571 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3573 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
3575 #ifndef USE_IX86_FRAME_POINTER
3576 #define USE_IX86_FRAME_POINTER 0
3579 #ifndef USE_X86_64_FRAME_POINTER
3580 #define USE_X86_64_FRAME_POINTER 0
3583 /* Set the default values for switches whose default depends on TARGET_64BIT
3584 in case they weren't overwritten by command line options. */
3585 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3587 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3588 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3589 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3590 opts
->x_flag_unwind_tables
3591 = opts
->x_flag_asynchronous_unwind_tables
= 1;
3592 if (opts
->x_flag_pcc_struct_return
== 2)
3593 opts
->x_flag_pcc_struct_return
= 0;
3597 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3598 opts
->x_flag_omit_frame_pointer
3599 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
3600 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3601 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3602 if (opts
->x_flag_pcc_struct_return
== 2)
3603 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3606 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3607 if (opts
->x_optimize_size
)
3608 ix86_cost
= &ix86_size_cost
;
3610 ix86_cost
= ix86_tune_cost
;
3612 /* Arrange to set up i386_stack_locals for all functions. */
3613 init_machine_status
= ix86_init_machine_status
;
3615 /* Validate -mregparm= value. */
3616 if (opts_set
->x_ix86_regparm
)
3618 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3619 warning (0, "-mregparm is ignored in 64-bit mode");
3620 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
3622 error ("-mregparm=%d is not between 0 and %d",
3623 opts
->x_ix86_regparm
, REGPARM_MAX
);
3624 opts
->x_ix86_regparm
= 0;
3627 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3628 opts
->x_ix86_regparm
= REGPARM_MAX
;
3630 /* Default align_* from the processor table. */
3631 if (opts
->x_align_loops
== 0)
3633 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3634 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3636 if (opts
->x_align_jumps
== 0)
3638 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3639 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3641 if (opts
->x_align_functions
== 0)
3643 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
3646 /* Provide default for -mbranch-cost= value. */
3647 if (!opts_set
->x_ix86_branch_cost
)
3648 opts
->x_ix86_branch_cost
= ix86_cost
->branch_cost
;
3650 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3652 opts
->x_target_flags
3653 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
3655 /* Enable by default the SSE and MMX builtins. Do allow the user to
3656 explicitly disable any of these. In particular, disabling SSE and
3657 MMX for kernel code is extremely useful. */
3658 if (!ix86_arch_specified
)
3659 opts
->x_ix86_isa_flags
3660 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3661 | TARGET_SUBTARGET64_ISA_DEFAULT
)
3662 & ~opts
->x_ix86_isa_flags_explicit
);
3664 if (TARGET_RTD_P (opts
->x_target_flags
))
3665 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3669 opts
->x_target_flags
3670 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
3672 if (!ix86_arch_specified
)
3673 opts
->x_ix86_isa_flags
3674 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
3676 /* i386 ABI does not specify red zone. It still makes sense to use it
3677 when programmer takes care to stack from being destroyed. */
3678 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
3679 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
3682 /* Keep nonleaf frame pointers. */
3683 if (opts
->x_flag_omit_frame_pointer
)
3684 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3685 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
3686 opts
->x_flag_omit_frame_pointer
= 1;
3688 /* If we're doing fast math, we don't care about comparison order
3689 wrt NaNs. This lets us use a shorter comparison sequence. */
3690 if (opts
->x_flag_finite_math_only
)
3691 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
3693 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3694 since the insns won't need emulation. */
3695 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
3696 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3698 /* Likewise, if the target doesn't have a 387, or we've specified
3699 software floating point, don't use 387 inline intrinsics. */
3700 if (!TARGET_80387_P (opts
->x_target_flags
))
3701 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
3703 /* Turn on MMX builtins for -msse. */
3704 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3705 opts
->x_ix86_isa_flags
3706 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
3708 /* Enable SSE prefetch. */
3709 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
3710 || (TARGET_PRFCHW
&& !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
)))
3711 x86_prefetch_sse
= true;
3713 /* Enable prefetch{,w} instructions for -m3dnow. */
3714 if (TARGET_3DNOW_P (opts
->x_ix86_isa_flags
))
3715 opts
->x_ix86_isa_flags
3716 |= OPTION_MASK_ISA_PRFCHW
& ~opts
->x_ix86_isa_flags_explicit
;
3718 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3719 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
3720 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
3721 opts
->x_ix86_isa_flags
3722 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3724 /* Enable lzcnt instruction for -mabm. */
3725 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
3726 opts
->x_ix86_isa_flags
3727 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3729 /* Validate -mpreferred-stack-boundary= value or default it to
3730 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3731 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3732 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
3734 int min
= (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3735 ? (TARGET_SSE_P (opts
->x_ix86_isa_flags
) ? 4 : 3) : 2);
3736 int max
= (TARGET_SEH
? 4 : 12);
3738 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
3739 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
3742 error ("-mpreferred-stack-boundary is not supported "
3745 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3746 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
3749 ix86_preferred_stack_boundary
3750 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3753 /* Set the default value for -mstackrealign. */
3754 if (opts
->x_ix86_force_align_arg_pointer
== -1)
3755 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3757 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3759 /* Validate -mincoming-stack-boundary= value or default it to
3760 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3761 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3762 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
3764 if (ix86_incoming_stack_boundary_arg
3765 < (TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2)
3766 || ix86_incoming_stack_boundary_arg
> 12)
3767 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3768 ix86_incoming_stack_boundary_arg
,
3769 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2);
3772 ix86_user_incoming_stack_boundary
3773 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3774 ix86_incoming_stack_boundary
3775 = ix86_user_incoming_stack_boundary
;
3779 /* Accept -msseregparm only if at least SSE support is enabled. */
3780 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
3781 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3782 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3784 if (opts_set
->x_ix86_fpmath
)
3786 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
3788 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3790 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3791 opts
->x_ix86_fpmath
= FPMATH_387
;
3793 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
3794 && !TARGET_80387_P (opts
->x_target_flags
))
3796 warning (0, "387 instruction set disabled, using SSE arithmetics");
3797 opts
->x_ix86_fpmath
= FPMATH_SSE
;
3801 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3802 fpmath=387. The second is however default at many targets since the
3803 extra 80bit precision of temporaries is considered to be part of ABI.
3804 Overwrite the default at least for -ffast-math.
3805 TODO: -mfpmath=both seems to produce same performing code with bit
3806 smaller binaries. It is however not clear if register allocation is
3807 ready for this setting.
3808 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3809 codegen. We may switch to 387 with -ffast-math for size optimized
3811 else if (fast_math_flags_set_p (&global_options
)
3812 && TARGET_SSE2_P (opts
->x_ix86_isa_flags
))
3813 ix86_fpmath
= FPMATH_SSE
;
3815 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
3817 /* If the i387 is disabled, then do not return values in it. */
3818 if (!TARGET_80387_P (opts
->x_target_flags
))
3819 opts
->x_target_flags
&= ~MASK_FLOAT_RETURNS
;
3821 /* Use external vectorized library in vectorizing intrinsics. */
3822 if (opts_set
->x_ix86_veclibabi_type
)
3823 switch (opts
->x_ix86_veclibabi_type
)
3825 case ix86_veclibabi_type_svml
:
3826 ix86_veclib_handler
= ix86_veclibabi_svml
;
3829 case ix86_veclibabi_type_acml
:
3830 ix86_veclib_handler
= ix86_veclibabi_acml
;
3837 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
3838 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3839 && !opts
->x_optimize_size
)
3840 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3842 /* If stack probes are required, the space used for large function
3843 arguments on the stack must also be probed, so enable
3844 -maccumulate-outgoing-args so this happens in the prologue. */
3845 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
3846 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3848 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3849 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3850 "for correctness", prefix
, suffix
);
3851 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3854 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3857 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3858 p
= strchr (internal_label_prefix
, 'X');
3859 internal_label_prefix_len
= p
- internal_label_prefix
;
3863 /* When scheduling description is not available, disable scheduler pass
3864 so it won't slow down the compilation and make x87 code slower. */
3865 if (!TARGET_SCHEDULE
)
3866 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
3868 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3869 ix86_tune_cost
->simultaneous_prefetches
,
3870 opts
->x_param_values
,
3871 opts_set
->x_param_values
);
3872 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3873 ix86_tune_cost
->prefetch_block
,
3874 opts
->x_param_values
,
3875 opts_set
->x_param_values
);
3876 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3877 ix86_tune_cost
->l1_cache_size
,
3878 opts
->x_param_values
,
3879 opts_set
->x_param_values
);
3880 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3881 ix86_tune_cost
->l2_cache_size
,
3882 opts
->x_param_values
,
3883 opts_set
->x_param_values
);
3885 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3886 if (opts
->x_flag_prefetch_loop_arrays
< 0
3888 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
3889 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3890 opts
->x_flag_prefetch_loop_arrays
= 1;
3892 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3893 can be opts->x_optimized to ap = __builtin_next_arg (0). */
3894 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
3895 targetm
.expand_builtin_va_start
= NULL
;
3897 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3899 ix86_gen_leave
= gen_leave_rex64
;
3900 if (Pmode
== DImode
)
3902 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3903 ix86_gen_tls_local_dynamic_base_64
3904 = gen_tls_local_dynamic_base_64_di
;
3908 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3909 ix86_gen_tls_local_dynamic_base_64
3910 = gen_tls_local_dynamic_base_64_si
;
3914 ix86_gen_leave
= gen_leave
;
3916 if (Pmode
== DImode
)
3918 ix86_gen_add3
= gen_adddi3
;
3919 ix86_gen_sub3
= gen_subdi3
;
3920 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3921 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3922 ix86_gen_andsp
= gen_anddi3
;
3923 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3924 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3925 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3926 ix86_gen_monitor
= gen_sse3_monitor_di
;
3930 ix86_gen_add3
= gen_addsi3
;
3931 ix86_gen_sub3
= gen_subsi3
;
3932 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3933 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3934 ix86_gen_andsp
= gen_andsi3
;
3935 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3936 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3937 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3938 ix86_gen_monitor
= gen_sse3_monitor_si
;
3942 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3943 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3944 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
3947 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
)
3949 if (opts
->x_flag_fentry
> 0)
3950 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3952 opts
->x_flag_fentry
= 0;
3954 else if (TARGET_SEH
)
3956 if (opts
->x_flag_fentry
== 0)
3957 sorry ("-mno-fentry isn%'t compatible with SEH");
3958 opts
->x_flag_fentry
= 1;
3960 else if (opts
->x_flag_fentry
< 0)
3962 #if defined(PROFILE_BEFORE_PROLOGUE)
3963 opts
->x_flag_fentry
= 1;
3965 opts
->x_flag_fentry
= 0;
3969 /* When not opts->x_optimize for size, enable vzeroupper optimization for
3970 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3971 AVX unaligned load/store. */
3972 if (!opts
->x_optimize_size
)
3974 if (flag_expensive_optimizations
3975 && !(opts_set
->x_target_flags
& MASK_VZEROUPPER
))
3976 opts
->x_target_flags
|= MASK_VZEROUPPER
;
3977 if (!ix86_tune_features
[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL
]
3978 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3979 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3980 if (!ix86_tune_features
[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL
]
3981 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3982 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3983 /* Enable 128-bit AVX instruction generation
3984 for the auto-vectorizer. */
3985 if (TARGET_AVX128_OPTIMAL
3986 && !(opts_set
->x_target_flags
& MASK_PREFER_AVX128
))
3987 opts
->x_target_flags
|= MASK_PREFER_AVX128
;
3990 if (opts
->x_ix86_recip_name
)
3992 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
3994 unsigned int mask
, i
;
3997 while ((q
= strtok (p
, ",")) != NULL
)
4008 if (!strcmp (q
, "default"))
4009 mask
= RECIP_MASK_ALL
;
4012 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4013 if (!strcmp (q
, recip_options
[i
].string
))
4015 mask
= recip_options
[i
].mask
;
4019 if (i
== ARRAY_SIZE (recip_options
))
4021 error ("unknown option for -mrecip=%s", q
);
4023 mask
= RECIP_MASK_NONE
;
4027 opts
->x_recip_mask_explicit
|= mask
;
4029 opts
->x_recip_mask
&= ~mask
;
4031 opts
->x_recip_mask
|= mask
;
4035 if (TARGET_RECIP_P (opts
->x_target_flags
))
4036 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
4037 else if (opts_set
->x_target_flags
& MASK_RECIP
)
4038 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
4040 /* Default long double to 64-bit for Bionic. */
4041 if (TARGET_HAS_BIONIC
4042 && !(opts_set
->x_target_flags
& MASK_LONG_DOUBLE_64
))
4043 opts
->x_target_flags
|= MASK_LONG_DOUBLE_64
;
4045 /* Save the initial options in case the user does function specific
4048 target_option_default_node
= target_option_current_node
4049 = build_target_option_node (opts
);
4051 /* Handle stack protector */
4052 if (!opts_set
->x_ix86_stack_protector_guard
)
4053 opts
->x_ix86_stack_protector_guard
4054 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4056 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4057 if (opts
->x_ix86_tune_memcpy_strategy
)
4059 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
4060 ix86_parse_stringop_strategy_string (str
, false);
4064 if (opts
->x_ix86_tune_memset_strategy
)
4066 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
4067 ix86_parse_stringop_strategy_string (str
, true);
4072 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4075 ix86_option_override (void)
4077 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4078 static struct register_pass_info insert_vzeroupper_info
4079 = { pass_insert_vzeroupper
, "reload",
4080 1, PASS_POS_INSERT_AFTER
4083 ix86_option_override_internal (true, &global_options
, &global_options_set
);
4086 /* This needs to be done at start up. It's convenient to do it here. */
4087 register_pass (&insert_vzeroupper_info
);
4090 /* Update register usage after having seen the compiler flags. */
4093 ix86_conditional_register_usage (void)
4098 /* The PIC register, if it exists, is fixed. */
4099 j
= PIC_OFFSET_TABLE_REGNUM
;
4100 if (j
!= INVALID_REGNUM
)
4101 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4103 /* For 32-bit targets, squash the REX registers. */
4106 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4107 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4108 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4109 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4110 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4111 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4114 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4115 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4116 : TARGET_64BIT
? (1 << 2)
4119 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4121 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4123 /* Set/reset conditionally defined registers from
4124 CALL_USED_REGISTERS initializer. */
4125 if (call_used_regs
[i
] > 1)
4126 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4128 /* Calculate registers of CLOBBERED_REGS register set
4129 as call used registers from GENERAL_REGS register set. */
4130 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4131 && call_used_regs
[i
])
4132 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4135 /* If MMX is disabled, squash the registers. */
4137 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4138 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4139 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4141 /* If SSE is disabled, squash the registers. */
4143 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4144 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4145 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4147 /* If the FPU is disabled, squash the registers. */
4148 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4149 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4150 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4151 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4153 /* If AVX512F is disabled, squash the registers. */
4154 if (! TARGET_AVX512F
)
4156 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4157 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4159 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4160 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4163 /* If MPX is disabled, squash the registers. */
4165 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
4166 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4170 /* Save the current options */
4173 ix86_function_specific_save (struct cl_target_option
*ptr
,
4174 struct gcc_options
*opts
)
4176 ptr
->arch
= ix86_arch
;
4177 ptr
->schedule
= ix86_schedule
;
4178 ptr
->tune
= ix86_tune
;
4179 ptr
->branch_cost
= ix86_branch_cost
;
4180 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4181 ptr
->arch_specified
= ix86_arch_specified
;
4182 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
4183 ptr
->x_ix86_target_flags_explicit
= opts
->x_ix86_target_flags_explicit
;
4184 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
4186 /* The fields are char but the variables are not; make sure the
4187 values fit in the fields. */
4188 gcc_assert (ptr
->arch
== ix86_arch
);
4189 gcc_assert (ptr
->schedule
== ix86_schedule
);
4190 gcc_assert (ptr
->tune
== ix86_tune
);
4191 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4194 /* Restore the current options */
4197 ix86_function_specific_restore (struct gcc_options
*opts
,
4198 struct cl_target_option
*ptr
)
4200 enum processor_type old_tune
= ix86_tune
;
4201 enum processor_type old_arch
= ix86_arch
;
4202 unsigned int ix86_arch_mask
;
4205 ix86_arch
= (enum processor_type
) ptr
->arch
;
4206 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4207 ix86_tune
= (enum processor_type
) ptr
->tune
;
4208 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
4209 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4210 ix86_arch_specified
= ptr
->arch_specified
;
4211 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4212 opts
->x_ix86_target_flags_explicit
= ptr
->x_ix86_target_flags_explicit
;
4213 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4215 /* Recreate the arch feature tests if the arch changed */
4216 if (old_arch
!= ix86_arch
)
4218 ix86_arch_mask
= 1u << ix86_arch
;
4219 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4220 ix86_arch_features
[i
]
4221 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4224 /* Recreate the tune optimization tests */
4225 if (old_tune
!= ix86_tune
)
4226 set_ix86_tune_features (ix86_tune
, false);
4229 /* Print the current options */
4232 ix86_function_specific_print (FILE *file
, int indent
,
4233 struct cl_target_option
*ptr
)
4236 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4237 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4239 fprintf (file
, "%*sarch = %d (%s)\n",
4242 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4243 ? cpu_names
[ptr
->arch
]
4246 fprintf (file
, "%*stune = %d (%s)\n",
4249 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4250 ? cpu_names
[ptr
->tune
]
4253 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4257 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4258 free (target_string
);
4263 /* Inner function to process the attribute((target(...))), take an argument and
4264 set the current options from the argument. If we have a list, recursively go
4268 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4269 struct gcc_options
*opts
,
4270 struct gcc_options
*opts_set
,
4271 struct gcc_options
*enum_opts_set
)
4276 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4277 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4278 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4279 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4280 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4296 enum ix86_opt_type type
;
4301 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4302 IX86_ATTR_ISA ("abm", OPT_mabm
),
4303 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4304 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4305 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4306 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4307 IX86_ATTR_ISA ("aes", OPT_maes
),
4308 IX86_ATTR_ISA ("avx", OPT_mavx
),
4309 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4310 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
4311 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
4312 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
4313 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
4314 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4315 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4316 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4317 IX86_ATTR_ISA ("sse", OPT_msse
),
4318 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4319 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4320 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4321 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4322 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4323 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4324 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4325 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4326 IX86_ATTR_ISA ("fma", OPT_mfma
),
4327 IX86_ATTR_ISA ("xop", OPT_mxop
),
4328 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4329 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4330 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4331 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4332 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4333 IX86_ATTR_ISA ("hle", OPT_mhle
),
4334 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4335 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4336 IX86_ATTR_ISA ("adx", OPT_madx
),
4337 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4338 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4339 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4342 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4344 /* string options */
4345 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4346 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4349 IX86_ATTR_YES ("cld",
4353 IX86_ATTR_NO ("fancy-math-387",
4354 OPT_mfancy_math_387
,
4355 MASK_NO_FANCY_MATH_387
),
4357 IX86_ATTR_YES ("ieee-fp",
4361 IX86_ATTR_YES ("inline-all-stringops",
4362 OPT_minline_all_stringops
,
4363 MASK_INLINE_ALL_STRINGOPS
),
4365 IX86_ATTR_YES ("inline-stringops-dynamically",
4366 OPT_minline_stringops_dynamically
,
4367 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4369 IX86_ATTR_NO ("align-stringops",
4370 OPT_mno_align_stringops
,
4371 MASK_NO_ALIGN_STRINGOPS
),
4373 IX86_ATTR_YES ("recip",
4379 /* If this is a list, recurse to get the options. */
4380 if (TREE_CODE (args
) == TREE_LIST
)
4384 for (; args
; args
= TREE_CHAIN (args
))
4385 if (TREE_VALUE (args
)
4386 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4387 p_strings
, opts
, opts_set
,
4394 else if (TREE_CODE (args
) != STRING_CST
)
4396 error ("attribute %<target%> argument not a string");
4400 /* Handle multiple arguments separated by commas. */
4401 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4403 while (next_optstr
&& *next_optstr
!= '\0')
4405 char *p
= next_optstr
;
4407 char *comma
= strchr (next_optstr
, ',');
4408 const char *opt_string
;
4409 size_t len
, opt_len
;
4414 enum ix86_opt_type type
= ix86_opt_unknown
;
4420 len
= comma
- next_optstr
;
4421 next_optstr
= comma
+ 1;
4429 /* Recognize no-xxx. */
4430 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4439 /* Find the option. */
4442 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4444 type
= attrs
[i
].type
;
4445 opt_len
= attrs
[i
].len
;
4446 if (ch
== attrs
[i
].string
[0]
4447 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4450 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4453 mask
= attrs
[i
].mask
;
4454 opt_string
= attrs
[i
].string
;
4459 /* Process the option. */
4462 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4466 else if (type
== ix86_opt_isa
)
4468 struct cl_decoded_option decoded
;
4470 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4471 ix86_handle_option (opts
, opts_set
,
4472 &decoded
, input_location
);
4475 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4477 if (type
== ix86_opt_no
)
4478 opt_set_p
= !opt_set_p
;
4481 opts
->x_target_flags
|= mask
;
4483 opts
->x_target_flags
&= ~mask
;
4486 else if (type
== ix86_opt_str
)
4490 error ("option(\"%s\") was already specified", opt_string
);
4494 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4497 else if (type
== ix86_opt_enum
)
4502 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4504 set_option (opts
, enum_opts_set
, opt
, value
,
4505 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4509 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4521 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4524 ix86_valid_target_attribute_tree (tree args
,
4525 struct gcc_options
*opts
,
4526 struct gcc_options
*opts_set
)
4528 const char *orig_arch_string
= ix86_arch_string
;
4529 const char *orig_tune_string
= ix86_tune_string
;
4530 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
4531 int orig_tune_defaulted
= ix86_tune_defaulted
;
4532 int orig_arch_specified
= ix86_arch_specified
;
4533 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4536 struct cl_target_option
*def
4537 = TREE_TARGET_OPTION (target_option_default_node
);
4538 struct gcc_options enum_opts_set
;
4540 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4542 /* Process each of the options on the chain. */
4543 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
4544 opts_set
, &enum_opts_set
))
4545 return error_mark_node
;
4547 /* If the changed options are different from the default, rerun
4548 ix86_option_override_internal, and then save the options away.
4549 The string options are are attribute options, and will be undone
4550 when we copy the save structure. */
4551 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
4552 || opts
->x_target_flags
!= def
->x_target_flags
4553 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4554 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4555 || enum_opts_set
.x_ix86_fpmath
)
4557 /* If we are using the default tune= or arch=, undo the string assigned,
4558 and use the default. */
4559 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4560 opts
->x_ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4561 else if (!orig_arch_specified
)
4562 opts
->x_ix86_arch_string
= NULL
;
4564 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4565 opts
->x_ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4566 else if (orig_tune_defaulted
)
4567 opts
->x_ix86_tune_string
= NULL
;
4569 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4570 if (enum_opts_set
.x_ix86_fpmath
)
4571 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4572 else if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4573 && TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4575 opts
->x_ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4576 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4579 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4580 ix86_option_override_internal (false, opts
, opts_set
);
4582 /* Add any builtin functions with the new isa if any. */
4583 ix86_add_new_builtins (opts
->x_ix86_isa_flags
);
4585 /* Save the current options unless we are validating options for
4587 t
= build_target_option_node (opts
);
4589 opts
->x_ix86_arch_string
= orig_arch_string
;
4590 opts
->x_ix86_tune_string
= orig_tune_string
;
4591 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
4593 /* Free up memory allocated to hold the strings */
4594 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4595 free (option_strings
[i
]);
4601 /* Hook to validate attribute((target("string"))). */
4604 ix86_valid_target_attribute_p (tree fndecl
,
4605 tree
ARG_UNUSED (name
),
4607 int ARG_UNUSED (flags
))
4609 struct gcc_options func_options
;
4610 tree new_target
, new_optimize
;
4613 /* attribute((target("default"))) does nothing, beyond
4614 affecting multi-versioning. */
4615 if (TREE_VALUE (args
)
4616 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4617 && TREE_CHAIN (args
) == NULL_TREE
4618 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4621 tree old_optimize
= build_optimization_node (&global_options
);
4623 /* Get the optimization options of the current function. */
4624 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4627 func_optimize
= old_optimize
;
4629 /* Init func_options. */
4630 memset (&func_options
, 0, sizeof (func_options
));
4631 init_options_struct (&func_options
, NULL
);
4632 lang_hooks
.init_options_struct (&func_options
);
4634 cl_optimization_restore (&func_options
,
4635 TREE_OPTIMIZATION (func_optimize
));
4637 /* Initialize func_options to the default before its target options can
4639 cl_target_option_restore (&func_options
,
4640 TREE_TARGET_OPTION (target_option_default_node
));
4642 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
4643 &global_options_set
);
4645 new_optimize
= build_optimization_node (&func_options
);
4647 if (new_target
== error_mark_node
)
4650 else if (fndecl
&& new_target
)
4652 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4654 if (old_optimize
!= new_optimize
)
4655 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4662 /* Hook to determine if one function can safely inline another. */
4665 ix86_can_inline_p (tree caller
, tree callee
)
4668 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4669 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4671 /* If callee has no option attributes, then it is ok to inline. */
4675 /* If caller has no option attributes, but callee does then it is not ok to
4677 else if (!caller_tree
)
4682 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4683 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4685 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4686 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4688 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4689 != callee_opts
->x_ix86_isa_flags
)
4692 /* See if we have the same non-isa options. */
4693 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4696 /* See if arch, tune, etc. are the same. */
4697 else if (caller_opts
->arch
!= callee_opts
->arch
)
4700 else if (caller_opts
->tune
!= callee_opts
->tune
)
4703 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4706 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4717 /* Remember the last target of ix86_set_current_function. */
4718 static GTY(()) tree ix86_previous_fndecl
;
4720 /* Invalidate ix86_previous_fndecl cache. */
4722 ix86_reset_previous_fndecl (void)
4724 ix86_previous_fndecl
= NULL_TREE
;
4727 /* Establish appropriate back-end context for processing the function
4728 FNDECL. The argument might be NULL to indicate processing at top
4729 level, outside of any function scope. */
4731 ix86_set_current_function (tree fndecl
)
4733 /* Only change the context if the function changes. This hook is called
4734 several times in the course of compiling a function, and we don't want to
4735 slow things down too much or call target_reinit when it isn't safe. */
4736 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4738 tree old_tree
= (ix86_previous_fndecl
4739 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4742 tree new_tree
= (fndecl
4743 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4746 ix86_previous_fndecl
= fndecl
;
4747 if (old_tree
== new_tree
)
4752 cl_target_option_restore (&global_options
,
4753 TREE_TARGET_OPTION (new_tree
));
4759 struct cl_target_option
*def
4760 = TREE_TARGET_OPTION (target_option_current_node
);
4762 cl_target_option_restore (&global_options
, def
);
4769 /* Return true if this goes in large data/bss. */
4772 ix86_in_large_data_p (tree exp
)
4774 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4777 /* Functions are never large data. */
4778 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4781 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4783 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4784 if (strcmp (section
, ".ldata") == 0
4785 || strcmp (section
, ".lbss") == 0)
4791 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4793 /* If this is an incomplete type with size 0, then we can't put it
4794 in data because it might be too big when completed. */
4795 if (!size
|| size
> ix86_section_threshold
)
4802 /* Switch to the appropriate section for output of DECL.
4803 DECL is either a `VAR_DECL' node or a constant of some sort.
4804 RELOC indicates whether forming the initial value of DECL requires
4805 link-time relocations. */
4807 ATTRIBUTE_UNUSED
static section
*
4808 x86_64_elf_select_section (tree decl
, int reloc
,
4809 unsigned HOST_WIDE_INT align
)
4811 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4812 && ix86_in_large_data_p (decl
))
4814 const char *sname
= NULL
;
4815 unsigned int flags
= SECTION_WRITE
;
4816 switch (categorize_decl_for_section (decl
, reloc
))
4821 case SECCAT_DATA_REL
:
4822 sname
= ".ldata.rel";
4824 case SECCAT_DATA_REL_LOCAL
:
4825 sname
= ".ldata.rel.local";
4827 case SECCAT_DATA_REL_RO
:
4828 sname
= ".ldata.rel.ro";
4830 case SECCAT_DATA_REL_RO_LOCAL
:
4831 sname
= ".ldata.rel.ro.local";
4835 flags
|= SECTION_BSS
;
4838 case SECCAT_RODATA_MERGE_STR
:
4839 case SECCAT_RODATA_MERGE_STR_INIT
:
4840 case SECCAT_RODATA_MERGE_CONST
:
4844 case SECCAT_SRODATA
:
4851 /* We don't split these for medium model. Place them into
4852 default sections and hope for best. */
4857 /* We might get called with string constants, but get_named_section
4858 doesn't like them as they are not DECLs. Also, we need to set
4859 flags in that case. */
4861 return get_section (sname
, flags
, NULL
);
4862 return get_named_section (decl
, sname
, reloc
);
4865 return default_elf_select_section (decl
, reloc
, align
);
4868 /* Select a set of attributes for section NAME based on the properties
4869 of DECL and whether or not RELOC indicates that DECL's initializer
4870 might contain runtime relocations. */
4872 static unsigned int ATTRIBUTE_UNUSED
4873 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
4875 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
4877 if (decl
== NULL_TREE
4878 && (strcmp (name
, ".ldata.rel.ro") == 0
4879 || strcmp (name
, ".ldata.rel.ro.local") == 0))
4880 flags
|= SECTION_RELRO
;
4882 if (strcmp (name
, ".lbss") == 0
4883 || strncmp (name
, ".lbss.", 5) == 0
4884 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
4885 flags
|= SECTION_BSS
;
4890 /* Build up a unique section name, expressed as a
4891 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4892 RELOC indicates whether the initial value of EXP requires
4893 link-time relocations. */
4895 static void ATTRIBUTE_UNUSED
4896 x86_64_elf_unique_section (tree decl
, int reloc
)
4898 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4899 && ix86_in_large_data_p (decl
))
4901 const char *prefix
= NULL
;
4902 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4903 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4905 switch (categorize_decl_for_section (decl
, reloc
))
4908 case SECCAT_DATA_REL
:
4909 case SECCAT_DATA_REL_LOCAL
:
4910 case SECCAT_DATA_REL_RO
:
4911 case SECCAT_DATA_REL_RO_LOCAL
:
4912 prefix
= one_only
? ".ld" : ".ldata";
4915 prefix
= one_only
? ".lb" : ".lbss";
4918 case SECCAT_RODATA_MERGE_STR
:
4919 case SECCAT_RODATA_MERGE_STR_INIT
:
4920 case SECCAT_RODATA_MERGE_CONST
:
4921 prefix
= one_only
? ".lr" : ".lrodata";
4923 case SECCAT_SRODATA
:
4930 /* We don't split these for medium model. Place them into
4931 default sections and hope for best. */
4936 const char *name
, *linkonce
;
4939 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4940 name
= targetm
.strip_name_encoding (name
);
4942 /* If we're using one_only, then there needs to be a .gnu.linkonce
4943 prefix to the section name. */
4944 linkonce
= one_only
? ".gnu.linkonce" : "";
4946 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4948 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4952 default_unique_section (decl
, reloc
);
4955 #ifdef COMMON_ASM_OP
4956 /* This says how to output assembler code to declare an
4957 uninitialized external linkage data object.
4959 For medium model x86-64 we need to use .largecomm opcode for
4962 x86_elf_aligned_common (FILE *file
,
4963 const char *name
, unsigned HOST_WIDE_INT size
,
4966 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4967 && size
> (unsigned int)ix86_section_threshold
)
4968 fputs (".largecomm\t", file
);
4970 fputs (COMMON_ASM_OP
, file
);
4971 assemble_name (file
, name
);
4972 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4973 size
, align
/ BITS_PER_UNIT
);
4977 /* Utility function for targets to use in implementing
4978 ASM_OUTPUT_ALIGNED_BSS. */
4981 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4982 const char *name
, unsigned HOST_WIDE_INT size
,
4985 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4986 && size
> (unsigned int)ix86_section_threshold
)
4987 switch_to_section (get_named_section (decl
, ".lbss", 0));
4989 switch_to_section (bss_section
);
4990 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4991 #ifdef ASM_DECLARE_OBJECT_NAME
4992 last_assemble_variable_decl
= decl
;
4993 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4995 /* Standard thing is just output label for the object. */
4996 ASM_OUTPUT_LABEL (file
, name
);
4997 #endif /* ASM_DECLARE_OBJECT_NAME */
4998 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
5001 /* Decide whether we must probe the stack before any space allocation
5002 on this target. It's essentially TARGET_STACK_PROBE except when
5003 -fstack-check causes the stack to be already probed differently. */
5006 ix86_target_stack_probe (void)
5008 /* Do not probe the stack twice if static stack checking is enabled. */
5009 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5012 return TARGET_STACK_PROBE
;
5015 /* Decide whether we can make a sibling call to a function. DECL is the
5016 declaration of the function being targeted by the call and EXP is the
5017 CALL_EXPR representing the call. */
5020 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5022 tree type
, decl_or_type
;
5025 /* If we are generating position-independent code, we cannot sibcall
5026 optimize any indirect call, or a direct call to a global function,
5027 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5031 && (!decl
|| !targetm
.binds_local_p (decl
)))
5034 /* If we need to align the outgoing stack, then sibcalling would
5035 unalign the stack, which may break the called function. */
5036 if (ix86_minimum_incoming_stack_boundary (true)
5037 < PREFERRED_STACK_BOUNDARY
)
5042 decl_or_type
= decl
;
5043 type
= TREE_TYPE (decl
);
5047 /* We're looking at the CALL_EXPR, we need the type of the function. */
5048 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5049 type
= TREE_TYPE (type
); /* pointer type */
5050 type
= TREE_TYPE (type
); /* function type */
5051 decl_or_type
= type
;
5054 /* Check that the return value locations are the same. Like
5055 if we are returning floats on the 80387 register stack, we cannot
5056 make a sibcall from a function that doesn't return a float to a
5057 function that does or, conversely, from a function that does return
5058 a float to a function that doesn't; the necessary stack adjustment
5059 would not be executed. This is also the place we notice
5060 differences in the return value ABI. Note that it is ok for one
5061 of the functions to have void return type as long as the return
5062 value of the other is passed in a register. */
5063 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5064 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5066 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5068 if (!rtx_equal_p (a
, b
))
5071 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5073 else if (!rtx_equal_p (a
, b
))
5078 /* The SYSV ABI has more call-clobbered registers;
5079 disallow sibcalls from MS to SYSV. */
5080 if (cfun
->machine
->call_abi
== MS_ABI
5081 && ix86_function_type_abi (type
) == SYSV_ABI
)
5086 /* If this call is indirect, we'll need to be able to use a
5087 call-clobbered register for the address of the target function.
5088 Make sure that all such registers are not used for passing
5089 parameters. Note that DLLIMPORT functions are indirect. */
5091 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5093 if (ix86_function_regparm (type
, NULL
) >= 3)
5095 /* ??? Need to count the actual number of registers to be used,
5096 not the possible number of registers. Fix later. */
5102 /* Otherwise okay. That also includes certain types of indirect calls. */
5106 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5107 and "sseregparm" calling convention attributes;
5108 arguments as in struct attribute_spec.handler. */
5111 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5113 int flags ATTRIBUTE_UNUSED
,
5116 if (TREE_CODE (*node
) != FUNCTION_TYPE
5117 && TREE_CODE (*node
) != METHOD_TYPE
5118 && TREE_CODE (*node
) != FIELD_DECL
5119 && TREE_CODE (*node
) != TYPE_DECL
)
5121 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5123 *no_add_attrs
= true;
5127 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5128 if (is_attribute_p ("regparm", name
))
5132 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5134 error ("fastcall and regparm attributes are not compatible");
5137 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5139 error ("regparam and thiscall attributes are not compatible");
5142 cst
= TREE_VALUE (args
);
5143 if (TREE_CODE (cst
) != INTEGER_CST
)
5145 warning (OPT_Wattributes
,
5146 "%qE attribute requires an integer constant argument",
5148 *no_add_attrs
= true;
5150 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5152 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5154 *no_add_attrs
= true;
5162 /* Do not warn when emulating the MS ABI. */
5163 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5164 && TREE_CODE (*node
) != METHOD_TYPE
)
5165 || ix86_function_type_abi (*node
) != MS_ABI
)
5166 warning (OPT_Wattributes
, "%qE attribute ignored",
5168 *no_add_attrs
= true;
5172 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5173 if (is_attribute_p ("fastcall", name
))
5175 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5177 error ("fastcall and cdecl attributes are not compatible");
5179 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5181 error ("fastcall and stdcall attributes are not compatible");
5183 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5185 error ("fastcall and regparm attributes are not compatible");
5187 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5189 error ("fastcall and thiscall attributes are not compatible");
5193 /* Can combine stdcall with fastcall (redundant), regparm and
5195 else if (is_attribute_p ("stdcall", name
))
5197 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5199 error ("stdcall and cdecl attributes are not compatible");
5201 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5203 error ("stdcall and fastcall attributes are not compatible");
5205 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5207 error ("stdcall and thiscall attributes are not compatible");
5211 /* Can combine cdecl with regparm and sseregparm. */
5212 else if (is_attribute_p ("cdecl", name
))
5214 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5216 error ("stdcall and cdecl attributes are not compatible");
5218 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5220 error ("fastcall and cdecl attributes are not compatible");
5222 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5224 error ("cdecl and thiscall attributes are not compatible");
5227 else if (is_attribute_p ("thiscall", name
))
5229 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5230 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5232 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5234 error ("stdcall and thiscall attributes are not compatible");
5236 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5238 error ("fastcall and thiscall attributes are not compatible");
5240 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5242 error ("cdecl and thiscall attributes are not compatible");
5246 /* Can combine sseregparm with all attributes. */
5251 /* The transactional memory builtins are implicitly regparm or fastcall
5252 depending on the ABI. Override the generic do-nothing attribute that
5253 these builtins were declared with, and replace it with one of the two
5254 attributes that we expect elsewhere. */
5257 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5258 tree args ATTRIBUTE_UNUSED
,
5259 int flags
, bool *no_add_attrs
)
5263 /* In no case do we want to add the placeholder attribute. */
5264 *no_add_attrs
= true;
5266 /* The 64-bit ABI is unchanged for transactional memory. */
5270 /* ??? Is there a better way to validate 32-bit windows? We have
5271 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5272 if (CHECK_STACK_LIMIT
> 0)
5273 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5276 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5277 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5279 decl_attributes (node
, alt
, flags
);
5284 /* This function determines from TYPE the calling-convention. */
5287 ix86_get_callcvt (const_tree type
)
5289 unsigned int ret
= 0;
5294 return IX86_CALLCVT_CDECL
;
5296 attrs
= TYPE_ATTRIBUTES (type
);
5297 if (attrs
!= NULL_TREE
)
5299 if (lookup_attribute ("cdecl", attrs
))
5300 ret
|= IX86_CALLCVT_CDECL
;
5301 else if (lookup_attribute ("stdcall", attrs
))
5302 ret
|= IX86_CALLCVT_STDCALL
;
5303 else if (lookup_attribute ("fastcall", attrs
))
5304 ret
|= IX86_CALLCVT_FASTCALL
;
5305 else if (lookup_attribute ("thiscall", attrs
))
5306 ret
|= IX86_CALLCVT_THISCALL
;
5308 /* Regparam isn't allowed for thiscall and fastcall. */
5309 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5311 if (lookup_attribute ("regparm", attrs
))
5312 ret
|= IX86_CALLCVT_REGPARM
;
5313 if (lookup_attribute ("sseregparm", attrs
))
5314 ret
|= IX86_CALLCVT_SSEREGPARM
;
5317 if (IX86_BASE_CALLCVT(ret
) != 0)
5321 is_stdarg
= stdarg_p (type
);
5322 if (TARGET_RTD
&& !is_stdarg
)
5323 return IX86_CALLCVT_STDCALL
| ret
;
5327 || TREE_CODE (type
) != METHOD_TYPE
5328 || ix86_function_type_abi (type
) != MS_ABI
)
5329 return IX86_CALLCVT_CDECL
| ret
;
5331 return IX86_CALLCVT_THISCALL
;
5334 /* Return 0 if the attributes for two types are incompatible, 1 if they
5335 are compatible, and 2 if they are nearly compatible (which causes a
5336 warning to be generated). */
5339 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5341 unsigned int ccvt1
, ccvt2
;
5343 if (TREE_CODE (type1
) != FUNCTION_TYPE
5344 && TREE_CODE (type1
) != METHOD_TYPE
)
5347 ccvt1
= ix86_get_callcvt (type1
);
5348 ccvt2
= ix86_get_callcvt (type2
);
5351 if (ix86_function_regparm (type1
, NULL
)
5352 != ix86_function_regparm (type2
, NULL
))
5358 /* Return the regparm value for a function with the indicated TYPE and DECL.
5359 DECL may be NULL when calling function indirectly
5360 or considering a libcall. */
5363 ix86_function_regparm (const_tree type
, const_tree decl
)
5370 return (ix86_function_type_abi (type
) == SYSV_ABI
5371 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5372 ccvt
= ix86_get_callcvt (type
);
5373 regparm
= ix86_regparm
;
5375 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5377 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5380 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5384 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5386 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5389 /* Use register calling convention for local functions when possible. */
5391 && TREE_CODE (decl
) == FUNCTION_DECL
5393 && !(profile_flag
&& !flag_fentry
))
5395 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5396 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5397 if (i
&& i
->local
&& i
->can_change_signature
)
5399 int local_regparm
, globals
= 0, regno
;
5401 /* Make sure no regparm register is taken by a
5402 fixed register variable. */
5403 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5404 if (fixed_regs
[local_regparm
])
5407 /* We don't want to use regparm(3) for nested functions as
5408 these use a static chain pointer in the third argument. */
5409 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5412 /* In 32-bit mode save a register for the split stack. */
5413 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5416 /* Each fixed register usage increases register pressure,
5417 so less registers should be used for argument passing.
5418 This functionality can be overriden by an explicit
5420 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5421 if (fixed_regs
[regno
])
5425 = globals
< local_regparm
? local_regparm
- globals
: 0;
5427 if (local_regparm
> regparm
)
5428 regparm
= local_regparm
;
5435 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5436 DFmode (2) arguments in SSE registers for a function with the
5437 indicated TYPE and DECL. DECL may be NULL when calling function
5438 indirectly or considering a libcall. Otherwise return 0. */
5441 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5443 gcc_assert (!TARGET_64BIT
);
5445 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5446 by the sseregparm attribute. */
5447 if (TARGET_SSEREGPARM
5448 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5455 error ("calling %qD with attribute sseregparm without "
5456 "SSE/SSE2 enabled", decl
);
5458 error ("calling %qT with attribute sseregparm without "
5459 "SSE/SSE2 enabled", type
);
5467 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5468 (and DFmode for SSE2) arguments in SSE registers. */
5469 if (decl
&& TARGET_SSE_MATH
&& optimize
5470 && !(profile_flag
&& !flag_fentry
))
5472 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5473 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5474 if (i
&& i
->local
&& i
->can_change_signature
)
5475 return TARGET_SSE2
? 2 : 1;
5481 /* Return true if EAX is live at the start of the function. Used by
5482 ix86_expand_prologue to determine if we need special help before
5483 calling allocate_stack_worker. */
5486 ix86_eax_live_at_start_p (void)
5488 /* Cheat. Don't bother working forward from ix86_function_regparm
5489 to the function type to whether an actual argument is located in
5490 eax. Instead just look at cfg info, which is still close enough
5491 to correct at this point. This gives false positives for broken
5492 functions that might use uninitialized data that happens to be
5493 allocated in eax, but who cares? */
5494 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5498 ix86_keep_aggregate_return_pointer (tree fntype
)
5504 attr
= lookup_attribute ("callee_pop_aggregate_return",
5505 TYPE_ATTRIBUTES (fntype
));
5507 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5509 /* For 32-bit MS-ABI the default is to keep aggregate
5511 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5514 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5517 /* Value is the number of bytes of arguments automatically
5518 popped when returning from a subroutine call.
5519 FUNDECL is the declaration node of the function (as a tree),
5520 FUNTYPE is the data type of the function (as a tree),
5521 or for a library call it is an identifier node for the subroutine name.
5522 SIZE is the number of bytes of arguments passed on the stack.
5524 On the 80386, the RTD insn may be used to pop them if the number
5525 of args is fixed, but if the number is variable then the caller
5526 must pop them all. RTD can't be used for library calls now
5527 because the library is compiled with the Unix compiler.
5528 Use of RTD is a selectable option, since it is incompatible with
5529 standard Unix calling sequences. If the option is not selected,
5530 the caller must always pop the args.
5532 The attribute stdcall is equivalent to RTD on a per module basis. */
5535 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5539 /* None of the 64-bit ABIs pop arguments. */
5543 ccvt
= ix86_get_callcvt (funtype
);
5545 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5546 | IX86_CALLCVT_THISCALL
)) != 0
5547 && ! stdarg_p (funtype
))
5550 /* Lose any fake structure return argument if it is passed on the stack. */
5551 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5552 && !ix86_keep_aggregate_return_pointer (funtype
))
5554 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5556 return GET_MODE_SIZE (Pmode
);
5562 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5565 ix86_legitimate_combined_insn (rtx insn
)
5567 /* Check operand constraints in case hard registers were propagated
5568 into insn pattern. This check prevents combine pass from
5569 generating insn patterns with invalid hard register operands.
5570 These invalid insns can eventually confuse reload to error out
5571 with a spill failure. See also PRs 46829 and 46843. */
5572 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5576 extract_insn (insn
);
5577 preprocess_constraints ();
5579 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5581 rtx op
= recog_data
.operand
[i
];
5582 enum machine_mode mode
= GET_MODE (op
);
5583 struct operand_alternative
*op_alt
;
5588 /* A unary operator may be accepted by the predicate, but it
5589 is irrelevant for matching constraints. */
5593 if (GET_CODE (op
) == SUBREG
)
5595 if (REG_P (SUBREG_REG (op
))
5596 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5597 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5598 GET_MODE (SUBREG_REG (op
)),
5601 op
= SUBREG_REG (op
);
5604 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5607 op_alt
= recog_op_alt
[i
];
5609 /* Operand has no constraints, anything is OK. */
5610 win
= !recog_data
.n_alternatives
;
5612 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5614 if (op_alt
[j
].anything_ok
5615 || (op_alt
[j
].matches
!= -1
5617 (recog_data
.operand
[i
],
5618 recog_data
.operand
[op_alt
[j
].matches
]))
5619 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5634 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5636 static unsigned HOST_WIDE_INT
5637 ix86_asan_shadow_offset (void)
5639 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5640 : HOST_WIDE_INT_C (0x7fff8000))
5641 : (HOST_WIDE_INT_1
<< 29);
5644 /* Argument support functions. */
5646 /* Return true when register may be used to pass function parameters. */
5648 ix86_function_arg_regno_p (int regno
)
5651 const int *parm_regs
;
5656 return (regno
< REGPARM_MAX
5657 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5659 return (regno
< REGPARM_MAX
5660 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5661 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5662 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5663 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5666 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5667 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5670 /* TODO: The function should depend on current function ABI but
5671 builtins.c would need updating then. Therefore we use the
5674 /* RAX is used as hidden argument to va_arg functions. */
5675 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5678 if (ix86_abi
== MS_ABI
)
5679 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5681 parm_regs
= x86_64_int_parameter_registers
;
5682 for (i
= 0; i
< (ix86_abi
== MS_ABI
5683 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5684 if (regno
== parm_regs
[i
])
5689 /* Return if we do not know how to pass TYPE solely in registers. */
5692 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5694 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5697 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5698 The layout_type routine is crafty and tries to trick us into passing
5699 currently unsupported vector types on the stack by using TImode. */
5700 return (!TARGET_64BIT
&& mode
== TImode
5701 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5704 /* It returns the size, in bytes, of the area reserved for arguments passed
5705 in registers for the function represented by fndecl dependent to the used
5708 ix86_reg_parm_stack_space (const_tree fndecl
)
5710 enum calling_abi call_abi
= SYSV_ABI
;
5711 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5712 call_abi
= ix86_function_abi (fndecl
);
5714 call_abi
= ix86_function_type_abi (fndecl
);
5715 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5720 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5723 ix86_function_type_abi (const_tree fntype
)
5725 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5727 enum calling_abi abi
= ix86_abi
;
5728 if (abi
== SYSV_ABI
)
5730 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5733 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5740 /* We add this as a workaround in order to use libc_has_function
5743 ix86_libc_has_function (enum function_class fn_class
)
5745 return targetm
.libc_has_function (fn_class
);
5749 ix86_function_ms_hook_prologue (const_tree fn
)
5751 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5753 if (decl_function_context (fn
) != NULL_TREE
)
5754 error_at (DECL_SOURCE_LOCATION (fn
),
5755 "ms_hook_prologue is not compatible with nested function");
5762 static enum calling_abi
5763 ix86_function_abi (const_tree fndecl
)
5767 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5770 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5773 ix86_cfun_abi (void)
5777 return cfun
->machine
->call_abi
;
5780 /* Write the extra assembler code needed to declare a function properly. */
5783 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5786 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5790 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5791 unsigned int filler_cc
= 0xcccccccc;
5793 for (i
= 0; i
< filler_count
; i
+= 4)
5794 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5797 #ifdef SUBTARGET_ASM_UNWIND_INIT
5798 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5801 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5803 /* Output magic byte marker, if hot-patch attribute is set. */
5808 /* leaq [%rsp + 0], %rsp */
5809 asm_fprintf (asm_out_file
, ASM_BYTE
5810 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5814 /* movl.s %edi, %edi
5816 movl.s %esp, %ebp */
5817 asm_fprintf (asm_out_file
, ASM_BYTE
5818 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5824 extern void init_regs (void);
5826 /* Implementation of call abi switching target hook. Specific to FNDECL
5827 the specific call register sets are set. See also
5828 ix86_conditional_register_usage for more details. */
5830 ix86_call_abi_override (const_tree fndecl
)
5832 if (fndecl
== NULL_TREE
)
5833 cfun
->machine
->call_abi
= ix86_abi
;
5835 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5838 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5839 expensive re-initialization of init_regs each time we switch function context
5840 since this is needed only during RTL expansion. */
5842 ix86_maybe_switch_abi (void)
5845 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5849 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5850 for a call to a function whose data type is FNTYPE.
5851 For a library call, FNTYPE is 0. */
5854 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5855 tree fntype
, /* tree ptr for function decl */
5856 rtx libname
, /* SYMBOL_REF of library name or 0 */
5860 struct cgraph_local_info
*i
;
5862 memset (cum
, 0, sizeof (*cum
));
5866 i
= cgraph_local_info (fndecl
);
5867 cum
->call_abi
= ix86_function_abi (fndecl
);
5872 cum
->call_abi
= ix86_function_type_abi (fntype
);
5875 cum
->caller
= caller
;
5877 /* Set up the number of registers to use for passing arguments. */
5879 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5880 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5881 "or subtarget optimization implying it");
5882 cum
->nregs
= ix86_regparm
;
5885 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5886 ? X86_64_REGPARM_MAX
5887 : X86_64_MS_REGPARM_MAX
);
5891 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5894 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5895 ? X86_64_SSE_REGPARM_MAX
5896 : X86_64_MS_SSE_REGPARM_MAX
);
5900 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5901 cum
->warn_avx
= true;
5902 cum
->warn_sse
= true;
5903 cum
->warn_mmx
= true;
5905 /* Because type might mismatch in between caller and callee, we need to
5906 use actual type of function for local calls.
5907 FIXME: cgraph_analyze can be told to actually record if function uses
5908 va_start so for local functions maybe_vaarg can be made aggressive
5910 FIXME: once typesytem is fixed, we won't need this code anymore. */
5911 if (i
&& i
->local
&& i
->can_change_signature
)
5912 fntype
= TREE_TYPE (fndecl
);
5913 cum
->maybe_vaarg
= (fntype
5914 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5919 /* If there are variable arguments, then we won't pass anything
5920 in registers in 32-bit mode. */
5921 if (stdarg_p (fntype
))
5932 /* Use ecx and edx registers if function has fastcall attribute,
5933 else look for regparm information. */
5936 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5937 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5940 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5942 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5948 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5951 /* Set up the number of SSE registers used for passing SFmode
5952 and DFmode arguments. Warn for mismatching ABI. */
5953 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5957 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5958 But in the case of vector types, it is some vector mode.
5960 When we have only some of our vector isa extensions enabled, then there
5961 are some modes for which vector_mode_supported_p is false. For these
5962 modes, the generic vector support in gcc will choose some non-vector mode
5963 in order to implement the type. By computing the natural mode, we'll
5964 select the proper ABI location for the operand and not depend on whatever
5965 the middle-end decides to do with these vector types.
5967 The midde-end can't deal with the vector types > 16 bytes. In this
5968 case, we return the original mode and warn ABI change if CUM isn't
5971 static enum machine_mode
5972 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5974 enum machine_mode mode
= TYPE_MODE (type
);
5976 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5978 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5979 if ((size
== 8 || size
== 16 || size
== 32)
5980 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5981 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5983 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5985 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5986 mode
= MIN_MODE_VECTOR_FLOAT
;
5988 mode
= MIN_MODE_VECTOR_INT
;
5990 /* Get the mode which has this inner mode and number of units. */
5991 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5992 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5993 && GET_MODE_INNER (mode
) == innermode
)
5995 if (size
== 32 && !TARGET_AVX
)
5997 static bool warnedavx
;
6004 warning (0, "AVX vector argument without AVX "
6005 "enabled changes the ABI");
6007 return TYPE_MODE (type
);
6009 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
6011 static bool warnedsse
;
6018 warning (0, "SSE vector argument without SSE "
6019 "enabled changes the ABI");
6034 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6035 this may not agree with the mode that the type system has chosen for the
6036 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6037 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6040 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6045 if (orig_mode
!= BLKmode
)
6046 tmp
= gen_rtx_REG (orig_mode
, regno
);
6049 tmp
= gen_rtx_REG (mode
, regno
);
6050 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6051 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6057 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6058 of this code is to classify each 8bytes of incoming argument by the register
6059 class and assign registers accordingly. */
6061 /* Return the union class of CLASS1 and CLASS2.
6062 See the x86-64 PS ABI for details. */
6064 static enum x86_64_reg_class
6065 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6067 /* Rule #1: If both classes are equal, this is the resulting class. */
6068 if (class1
== class2
)
6071 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6073 if (class1
== X86_64_NO_CLASS
)
6075 if (class2
== X86_64_NO_CLASS
)
6078 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6079 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6080 return X86_64_MEMORY_CLASS
;
6082 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6083 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6084 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6085 return X86_64_INTEGERSI_CLASS
;
6086 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6087 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6088 return X86_64_INTEGER_CLASS
;
6090 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6092 if (class1
== X86_64_X87_CLASS
6093 || class1
== X86_64_X87UP_CLASS
6094 || class1
== X86_64_COMPLEX_X87_CLASS
6095 || class2
== X86_64_X87_CLASS
6096 || class2
== X86_64_X87UP_CLASS
6097 || class2
== X86_64_COMPLEX_X87_CLASS
)
6098 return X86_64_MEMORY_CLASS
;
6100 /* Rule #6: Otherwise class SSE is used. */
6101 return X86_64_SSE_CLASS
;
6104 /* Classify the argument of type TYPE and mode MODE.
6105 CLASSES will be filled by the register class used to pass each word
6106 of the operand. The number of words is returned. In case the parameter
6107 should be passed in memory, 0 is returned. As a special case for zero
6108 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6110 BIT_OFFSET is used internally for handling records and specifies offset
6111 of the offset in bits modulo 256 to avoid overflow cases.
6113 See the x86-64 PS ABI for details.
6117 classify_argument (enum machine_mode mode
, const_tree type
,
6118 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6120 HOST_WIDE_INT bytes
=
6121 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6123 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6125 /* Variable sized entities are always passed/returned in memory. */
6129 if (mode
!= VOIDmode
6130 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6133 if (type
&& AGGREGATE_TYPE_P (type
))
6137 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6139 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6143 for (i
= 0; i
< words
; i
++)
6144 classes
[i
] = X86_64_NO_CLASS
;
6146 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6147 signalize memory class, so handle it as special case. */
6150 classes
[0] = X86_64_NO_CLASS
;
6154 /* Classify each field of record and merge classes. */
6155 switch (TREE_CODE (type
))
6158 /* And now merge the fields of structure. */
6159 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6161 if (TREE_CODE (field
) == FIELD_DECL
)
6165 if (TREE_TYPE (field
) == error_mark_node
)
6168 /* Bitfields are always classified as integer. Handle them
6169 early, since later code would consider them to be
6170 misaligned integers. */
6171 if (DECL_BIT_FIELD (field
))
6173 for (i
= (int_bit_position (field
)
6174 + (bit_offset
% 64)) / 8 / 8;
6175 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6176 + tree_low_cst (DECL_SIZE (field
), 0)
6179 merge_classes (X86_64_INTEGER_CLASS
,
6186 type
= TREE_TYPE (field
);
6188 /* Flexible array member is ignored. */
6189 if (TYPE_MODE (type
) == BLKmode
6190 && TREE_CODE (type
) == ARRAY_TYPE
6191 && TYPE_SIZE (type
) == NULL_TREE
6192 && TYPE_DOMAIN (type
) != NULL_TREE
6193 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6198 if (!warned
&& warn_psabi
)
6201 inform (input_location
,
6202 "the ABI of passing struct with"
6203 " a flexible array member has"
6204 " changed in GCC 4.4");
6208 num
= classify_argument (TYPE_MODE (type
), type
,
6210 (int_bit_position (field
)
6211 + bit_offset
) % 256);
6214 pos
= (int_bit_position (field
)
6215 + (bit_offset
% 64)) / 8 / 8;
6216 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6218 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6225 /* Arrays are handled as small records. */
6228 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6229 TREE_TYPE (type
), subclasses
, bit_offset
);
6233 /* The partial classes are now full classes. */
6234 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6235 subclasses
[0] = X86_64_SSE_CLASS
;
6236 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6237 && !((bit_offset
% 64) == 0 && bytes
== 4))
6238 subclasses
[0] = X86_64_INTEGER_CLASS
;
6240 for (i
= 0; i
< words
; i
++)
6241 classes
[i
] = subclasses
[i
% num
];
6246 case QUAL_UNION_TYPE
:
6247 /* Unions are similar to RECORD_TYPE but offset is always 0.
6249 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6251 if (TREE_CODE (field
) == FIELD_DECL
)
6255 if (TREE_TYPE (field
) == error_mark_node
)
6258 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6259 TREE_TYPE (field
), subclasses
,
6263 for (i
= 0; i
< num
; i
++)
6264 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6275 /* When size > 16 bytes, if the first one isn't
6276 X86_64_SSE_CLASS or any other ones aren't
6277 X86_64_SSEUP_CLASS, everything should be passed in
6279 if (classes
[0] != X86_64_SSE_CLASS
)
6282 for (i
= 1; i
< words
; i
++)
6283 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6287 /* Final merger cleanup. */
6288 for (i
= 0; i
< words
; i
++)
6290 /* If one class is MEMORY, everything should be passed in
6292 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6295 /* The X86_64_SSEUP_CLASS should be always preceded by
6296 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6297 if (classes
[i
] == X86_64_SSEUP_CLASS
6298 && classes
[i
- 1] != X86_64_SSE_CLASS
6299 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6301 /* The first one should never be X86_64_SSEUP_CLASS. */
6302 gcc_assert (i
!= 0);
6303 classes
[i
] = X86_64_SSE_CLASS
;
6306 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6307 everything should be passed in memory. */
6308 if (classes
[i
] == X86_64_X87UP_CLASS
6309 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6313 /* The first one should never be X86_64_X87UP_CLASS. */
6314 gcc_assert (i
!= 0);
6315 if (!warned
&& warn_psabi
)
6318 inform (input_location
,
6319 "the ABI of passing union with long double"
6320 " has changed in GCC 4.4");
6328 /* Compute alignment needed. We align all types to natural boundaries with
6329 exception of XFmode that is aligned to 64bits. */
6330 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6332 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6335 mode_alignment
= 128;
6336 else if (mode
== XCmode
)
6337 mode_alignment
= 256;
6338 if (COMPLEX_MODE_P (mode
))
6339 mode_alignment
/= 2;
6340 /* Misaligned fields are always returned in memory. */
6341 if (bit_offset
% mode_alignment
)
6345 /* for V1xx modes, just use the base mode */
6346 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6347 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6348 mode
= GET_MODE_INNER (mode
);
6350 /* Classification of atomic types. */
6355 classes
[0] = X86_64_SSE_CLASS
;
6358 classes
[0] = X86_64_SSE_CLASS
;
6359 classes
[1] = X86_64_SSEUP_CLASS
;
6369 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6373 classes
[0] = X86_64_INTEGERSI_CLASS
;
6376 else if (size
<= 64)
6378 classes
[0] = X86_64_INTEGER_CLASS
;
6381 else if (size
<= 64+32)
6383 classes
[0] = X86_64_INTEGER_CLASS
;
6384 classes
[1] = X86_64_INTEGERSI_CLASS
;
6387 else if (size
<= 64+64)
6389 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6397 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6401 /* OImode shouldn't be used directly. */
6406 if (!(bit_offset
% 64))
6407 classes
[0] = X86_64_SSESF_CLASS
;
6409 classes
[0] = X86_64_SSE_CLASS
;
6412 classes
[0] = X86_64_SSEDF_CLASS
;
6415 classes
[0] = X86_64_X87_CLASS
;
6416 classes
[1] = X86_64_X87UP_CLASS
;
6419 classes
[0] = X86_64_SSE_CLASS
;
6420 classes
[1] = X86_64_SSEUP_CLASS
;
6423 classes
[0] = X86_64_SSE_CLASS
;
6424 if (!(bit_offset
% 64))
6430 if (!warned
&& warn_psabi
)
6433 inform (input_location
,
6434 "the ABI of passing structure with complex float"
6435 " member has changed in GCC 4.4");
6437 classes
[1] = X86_64_SSESF_CLASS
;
6441 classes
[0] = X86_64_SSEDF_CLASS
;
6442 classes
[1] = X86_64_SSEDF_CLASS
;
6445 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6448 /* This modes is larger than 16 bytes. */
6456 classes
[0] = X86_64_SSE_CLASS
;
6457 classes
[1] = X86_64_SSEUP_CLASS
;
6458 classes
[2] = X86_64_SSEUP_CLASS
;
6459 classes
[3] = X86_64_SSEUP_CLASS
;
6467 classes
[0] = X86_64_SSE_CLASS
;
6468 classes
[1] = X86_64_SSEUP_CLASS
;
6476 classes
[0] = X86_64_SSE_CLASS
;
6482 gcc_assert (VECTOR_MODE_P (mode
));
6487 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6489 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6490 classes
[0] = X86_64_INTEGERSI_CLASS
;
6492 classes
[0] = X86_64_INTEGER_CLASS
;
6493 classes
[1] = X86_64_INTEGER_CLASS
;
6494 return 1 + (bytes
> 8);
6498 /* Examine the argument and return set number of register required in each
6499 class. Return 0 iff parameter should be passed in memory. */
6501 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6502 int *int_nregs
, int *sse_nregs
)
6504 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6505 int n
= classify_argument (mode
, type
, regclass
, 0);
6511 for (n
--; n
>= 0; n
--)
6512 switch (regclass
[n
])
6514 case X86_64_INTEGER_CLASS
:
6515 case X86_64_INTEGERSI_CLASS
:
6518 case X86_64_SSE_CLASS
:
6519 case X86_64_SSESF_CLASS
:
6520 case X86_64_SSEDF_CLASS
:
6523 case X86_64_NO_CLASS
:
6524 case X86_64_SSEUP_CLASS
:
6526 case X86_64_X87_CLASS
:
6527 case X86_64_X87UP_CLASS
:
6531 case X86_64_COMPLEX_X87_CLASS
:
6532 return in_return
? 2 : 0;
6533 case X86_64_MEMORY_CLASS
:
6539 /* Construct container for the argument used by GCC interface. See
6540 FUNCTION_ARG for the detailed description. */
6543 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6544 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6545 const int *intreg
, int sse_regno
)
6547 /* The following variables hold the static issued_error state. */
6548 static bool issued_sse_arg_error
;
6549 static bool issued_sse_ret_error
;
6550 static bool issued_x87_ret_error
;
6552 enum machine_mode tmpmode
;
6554 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6555 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6559 int needed_sseregs
, needed_intregs
;
6560 rtx exp
[MAX_CLASSES
];
6563 n
= classify_argument (mode
, type
, regclass
, 0);
6566 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6569 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6572 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6573 some less clueful developer tries to use floating-point anyway. */
6574 if (needed_sseregs
&& !TARGET_SSE
)
6578 if (!issued_sse_ret_error
)
6580 error ("SSE register return with SSE disabled");
6581 issued_sse_ret_error
= true;
6584 else if (!issued_sse_arg_error
)
6586 error ("SSE register argument with SSE disabled");
6587 issued_sse_arg_error
= true;
6592 /* Likewise, error if the ABI requires us to return values in the
6593 x87 registers and the user specified -mno-80387. */
6594 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6595 for (i
= 0; i
< n
; i
++)
6596 if (regclass
[i
] == X86_64_X87_CLASS
6597 || regclass
[i
] == X86_64_X87UP_CLASS
6598 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6600 if (!issued_x87_ret_error
)
6602 error ("x87 register return with x87 disabled");
6603 issued_x87_ret_error
= true;
6608 /* First construct simple cases. Avoid SCmode, since we want to use
6609 single register to pass this type. */
6610 if (n
== 1 && mode
!= SCmode
)
6611 switch (regclass
[0])
6613 case X86_64_INTEGER_CLASS
:
6614 case X86_64_INTEGERSI_CLASS
:
6615 return gen_rtx_REG (mode
, intreg
[0]);
6616 case X86_64_SSE_CLASS
:
6617 case X86_64_SSESF_CLASS
:
6618 case X86_64_SSEDF_CLASS
:
6619 if (mode
!= BLKmode
)
6620 return gen_reg_or_parallel (mode
, orig_mode
,
6621 SSE_REGNO (sse_regno
));
6623 case X86_64_X87_CLASS
:
6624 case X86_64_COMPLEX_X87_CLASS
:
6625 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6626 case X86_64_NO_CLASS
:
6627 /* Zero sized array, struct or class. */
6633 && regclass
[0] == X86_64_SSE_CLASS
6634 && regclass
[1] == X86_64_SSEUP_CLASS
6636 return gen_reg_or_parallel (mode
, orig_mode
,
6637 SSE_REGNO (sse_regno
));
6639 && regclass
[0] == X86_64_SSE_CLASS
6640 && regclass
[1] == X86_64_SSEUP_CLASS
6641 && regclass
[2] == X86_64_SSEUP_CLASS
6642 && regclass
[3] == X86_64_SSEUP_CLASS
6644 return gen_reg_or_parallel (mode
, orig_mode
,
6645 SSE_REGNO (sse_regno
));
6647 && regclass
[0] == X86_64_X87_CLASS
6648 && regclass
[1] == X86_64_X87UP_CLASS
)
6649 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6652 && regclass
[0] == X86_64_INTEGER_CLASS
6653 && regclass
[1] == X86_64_INTEGER_CLASS
6654 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6655 && intreg
[0] + 1 == intreg
[1])
6656 return gen_rtx_REG (mode
, intreg
[0]);
6658 /* Otherwise figure out the entries of the PARALLEL. */
6659 for (i
= 0; i
< n
; i
++)
6663 switch (regclass
[i
])
6665 case X86_64_NO_CLASS
:
6667 case X86_64_INTEGER_CLASS
:
6668 case X86_64_INTEGERSI_CLASS
:
6669 /* Merge TImodes on aligned occasions here too. */
6670 if (i
* 8 + 8 > bytes
)
6672 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6673 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6677 /* We've requested 24 bytes we
6678 don't have mode for. Use DImode. */
6679 if (tmpmode
== BLKmode
)
6682 = gen_rtx_EXPR_LIST (VOIDmode
,
6683 gen_rtx_REG (tmpmode
, *intreg
),
6687 case X86_64_SSESF_CLASS
:
6689 = gen_rtx_EXPR_LIST (VOIDmode
,
6690 gen_rtx_REG (SFmode
,
6691 SSE_REGNO (sse_regno
)),
6695 case X86_64_SSEDF_CLASS
:
6697 = gen_rtx_EXPR_LIST (VOIDmode
,
6698 gen_rtx_REG (DFmode
,
6699 SSE_REGNO (sse_regno
)),
6703 case X86_64_SSE_CLASS
:
6711 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6721 && regclass
[1] == X86_64_SSEUP_CLASS
6722 && regclass
[2] == X86_64_SSEUP_CLASS
6723 && regclass
[3] == X86_64_SSEUP_CLASS
);
6731 = gen_rtx_EXPR_LIST (VOIDmode
,
6732 gen_rtx_REG (tmpmode
,
6733 SSE_REGNO (sse_regno
)),
6742 /* Empty aligned struct, union or class. */
6746 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6747 for (i
= 0; i
< nexps
; i
++)
6748 XVECEXP (ret
, 0, i
) = exp
[i
];
6752 /* Update the data in CUM to advance over an argument of mode MODE
6753 and data type TYPE. (TYPE is null for libcalls where that information
6754 may not be available.) */
6757 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6758 const_tree type
, HOST_WIDE_INT bytes
,
6759 HOST_WIDE_INT words
)
6775 cum
->words
+= words
;
6776 cum
->nregs
-= words
;
6777 cum
->regno
+= words
;
6779 if (cum
->nregs
<= 0)
6787 /* OImode shouldn't be used directly. */
6791 if (cum
->float_in_sse
< 2)
6794 if (cum
->float_in_sse
< 1)
6811 if (!type
|| !AGGREGATE_TYPE_P (type
))
6813 cum
->sse_words
+= words
;
6814 cum
->sse_nregs
-= 1;
6815 cum
->sse_regno
+= 1;
6816 if (cum
->sse_nregs
<= 0)
6830 if (!type
|| !AGGREGATE_TYPE_P (type
))
6832 cum
->mmx_words
+= words
;
6833 cum
->mmx_nregs
-= 1;
6834 cum
->mmx_regno
+= 1;
6835 if (cum
->mmx_nregs
<= 0)
6846 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6847 const_tree type
, HOST_WIDE_INT words
, bool named
)
6849 int int_nregs
, sse_nregs
;
6851 /* Unnamed 256bit vector mode parameters are passed on stack. */
6852 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6855 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6856 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6858 cum
->nregs
-= int_nregs
;
6859 cum
->sse_nregs
-= sse_nregs
;
6860 cum
->regno
+= int_nregs
;
6861 cum
->sse_regno
+= sse_nregs
;
6865 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6866 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6867 cum
->words
+= words
;
6872 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6873 HOST_WIDE_INT words
)
6875 /* Otherwise, this should be passed indirect. */
6876 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6878 cum
->words
+= words
;
6886 /* Update the data in CUM to advance over an argument of mode MODE and
6887 data type TYPE. (TYPE is null for libcalls where that information
6888 may not be available.) */
6891 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6892 const_tree type
, bool named
)
6894 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6895 HOST_WIDE_INT bytes
, words
;
6897 if (mode
== BLKmode
)
6898 bytes
= int_size_in_bytes (type
);
6900 bytes
= GET_MODE_SIZE (mode
);
6901 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6904 mode
= type_natural_mode (type
, NULL
);
6906 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6907 function_arg_advance_ms_64 (cum
, bytes
, words
);
6908 else if (TARGET_64BIT
)
6909 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6911 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6914 /* Define where to put the arguments to a function.
6915 Value is zero to push the argument on the stack,
6916 or a hard register in which to store the argument.
6918 MODE is the argument's machine mode.
6919 TYPE is the data type of the argument (as a tree).
6920 This is null for libcalls where that information may
6922 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6923 the preceding args and about the function being called.
6924 NAMED is nonzero if this argument is a named parameter
6925 (otherwise it is an extra parameter matching an ellipsis). */
6928 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6929 enum machine_mode orig_mode
, const_tree type
,
6930 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6932 static bool warnedsse
, warnedmmx
;
6934 /* Avoid the AL settings for the Unix64 ABI. */
6935 if (mode
== VOIDmode
)
6951 if (words
<= cum
->nregs
)
6953 int regno
= cum
->regno
;
6955 /* Fastcall allocates the first two DWORD (SImode) or
6956 smaller arguments to ECX and EDX if it isn't an
6962 || (type
&& AGGREGATE_TYPE_P (type
)))
6965 /* ECX not EAX is the first allocated register. */
6966 if (regno
== AX_REG
)
6969 return gen_rtx_REG (mode
, regno
);
6974 if (cum
->float_in_sse
< 2)
6977 if (cum
->float_in_sse
< 1)
6981 /* In 32bit, we pass TImode in xmm registers. */
6988 if (!type
|| !AGGREGATE_TYPE_P (type
))
6990 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6993 warning (0, "SSE vector argument without SSE enabled "
6997 return gen_reg_or_parallel (mode
, orig_mode
,
6998 cum
->sse_regno
+ FIRST_SSE_REG
);
7003 /* OImode shouldn't be used directly. */
7012 if (!type
|| !AGGREGATE_TYPE_P (type
))
7015 return gen_reg_or_parallel (mode
, orig_mode
,
7016 cum
->sse_regno
+ FIRST_SSE_REG
);
7026 if (!type
|| !AGGREGATE_TYPE_P (type
))
7028 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7031 warning (0, "MMX vector argument without MMX enabled "
7035 return gen_reg_or_parallel (mode
, orig_mode
,
7036 cum
->mmx_regno
+ FIRST_MMX_REG
);
7045 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7046 enum machine_mode orig_mode
, const_tree type
, bool named
)
7048 /* Handle a hidden AL argument containing number of registers
7049 for varargs x86-64 functions. */
7050 if (mode
== VOIDmode
)
7051 return GEN_INT (cum
->maybe_vaarg
7052 ? (cum
->sse_nregs
< 0
7053 ? X86_64_SSE_REGPARM_MAX
7068 /* Unnamed 256bit vector mode parameters are passed on stack. */
7074 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7076 &x86_64_int_parameter_registers
[cum
->regno
],
7081 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7082 enum machine_mode orig_mode
, bool named
,
7083 HOST_WIDE_INT bytes
)
7087 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7088 We use value of -2 to specify that current function call is MSABI. */
7089 if (mode
== VOIDmode
)
7090 return GEN_INT (-2);
7092 /* If we've run out of registers, it goes on the stack. */
7093 if (cum
->nregs
== 0)
7096 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7098 /* Only floating point modes are passed in anything but integer regs. */
7099 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7102 regno
= cum
->regno
+ FIRST_SSE_REG
;
7107 /* Unnamed floating parameters are passed in both the
7108 SSE and integer registers. */
7109 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7110 t2
= gen_rtx_REG (mode
, regno
);
7111 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7112 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7113 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7116 /* Handle aggregated types passed in register. */
7117 if (orig_mode
== BLKmode
)
7119 if (bytes
> 0 && bytes
<= 8)
7120 mode
= (bytes
> 4 ? DImode
: SImode
);
7121 if (mode
== BLKmode
)
7125 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7128 /* Return where to put the arguments to a function.
7129 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7131 MODE is the argument's machine mode. TYPE is the data type of the
7132 argument. It is null for libcalls where that information may not be
7133 available. CUM gives information about the preceding args and about
7134 the function being called. NAMED is nonzero if this argument is a
7135 named parameter (otherwise it is an extra parameter matching an
7139 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7140 const_tree type
, bool named
)
7142 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7143 enum machine_mode mode
= omode
;
7144 HOST_WIDE_INT bytes
, words
;
7147 if (mode
== BLKmode
)
7148 bytes
= int_size_in_bytes (type
);
7150 bytes
= GET_MODE_SIZE (mode
);
7151 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7153 /* To simplify the code below, represent vector types with a vector mode
7154 even if MMX/SSE are not active. */
7155 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7156 mode
= type_natural_mode (type
, cum
);
7158 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7159 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7160 else if (TARGET_64BIT
)
7161 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7163 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7168 /* A C expression that indicates when an argument must be passed by
7169 reference. If nonzero for an argument, a copy of that argument is
7170 made in memory and a pointer to the argument is passed instead of
7171 the argument itself. The pointer is passed in whatever way is
7172 appropriate for passing a pointer to that type. */
7175 ix86_pass_by_reference (cumulative_args_t cum_v
, enum machine_mode mode
,
7176 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7178 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7180 /* See Windows x64 Software Convention. */
7181 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7183 int msize
= (int) GET_MODE_SIZE (mode
);
7186 /* Arrays are passed by reference. */
7187 if (TREE_CODE (type
) == ARRAY_TYPE
)
7190 if (AGGREGATE_TYPE_P (type
))
7192 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7193 are passed by reference. */
7194 msize
= int_size_in_bytes (type
);
7198 /* __m128 is passed by reference. */
7200 case 1: case 2: case 4: case 8:
7206 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7212 /* Return true when TYPE should be 128bit aligned for 32bit argument
7213 passing ABI. XXX: This function is obsolete and is only used for
7214 checking psABI compatibility with previous versions of GCC. */
7217 ix86_compat_aligned_value_p (const_tree type
)
7219 enum machine_mode mode
= TYPE_MODE (type
);
7220 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7224 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7226 if (TYPE_ALIGN (type
) < 128)
7229 if (AGGREGATE_TYPE_P (type
))
7231 /* Walk the aggregates recursively. */
7232 switch (TREE_CODE (type
))
7236 case QUAL_UNION_TYPE
:
7240 /* Walk all the structure fields. */
7241 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7243 if (TREE_CODE (field
) == FIELD_DECL
7244 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7251 /* Just for use if some languages passes arrays by value. */
7252 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7263 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7264 XXX: This function is obsolete and is only used for checking psABI
7265 compatibility with previous versions of GCC. */
7268 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7269 const_tree type
, unsigned int align
)
7271 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7272 natural boundaries. */
7273 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7275 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7276 make an exception for SSE modes since these require 128bit
7279 The handling here differs from field_alignment. ICC aligns MMX
7280 arguments to 4 byte boundaries, while structure fields are aligned
7281 to 8 byte boundaries. */
7284 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7285 align
= PARM_BOUNDARY
;
7289 if (!ix86_compat_aligned_value_p (type
))
7290 align
= PARM_BOUNDARY
;
7293 if (align
> BIGGEST_ALIGNMENT
)
7294 align
= BIGGEST_ALIGNMENT
;
7298 /* Return true when TYPE should be 128bit aligned for 32bit argument
7302 ix86_contains_aligned_value_p (const_tree type
)
7304 enum machine_mode mode
= TYPE_MODE (type
);
7306 if (mode
== XFmode
|| mode
== XCmode
)
7309 if (TYPE_ALIGN (type
) < 128)
7312 if (AGGREGATE_TYPE_P (type
))
7314 /* Walk the aggregates recursively. */
7315 switch (TREE_CODE (type
))
7319 case QUAL_UNION_TYPE
:
7323 /* Walk all the structure fields. */
7324 for (field
= TYPE_FIELDS (type
);
7326 field
= DECL_CHAIN (field
))
7328 if (TREE_CODE (field
) == FIELD_DECL
7329 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7336 /* Just for use if some languages passes arrays by value. */
7337 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7346 return TYPE_ALIGN (type
) >= 128;
7351 /* Gives the alignment boundary, in bits, of an argument with the
7352 specified mode and type. */
7355 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7360 /* Since the main variant type is used for call, we convert it to
7361 the main variant type. */
7362 type
= TYPE_MAIN_VARIANT (type
);
7363 align
= TYPE_ALIGN (type
);
7366 align
= GET_MODE_ALIGNMENT (mode
);
7367 if (align
< PARM_BOUNDARY
)
7368 align
= PARM_BOUNDARY
;
7372 unsigned int saved_align
= align
;
7376 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7379 if (mode
== XFmode
|| mode
== XCmode
)
7380 align
= PARM_BOUNDARY
;
7382 else if (!ix86_contains_aligned_value_p (type
))
7383 align
= PARM_BOUNDARY
;
7386 align
= PARM_BOUNDARY
;
7391 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7395 inform (input_location
,
7396 "The ABI for passing parameters with %d-byte"
7397 " alignment has changed in GCC 4.6",
7398 align
/ BITS_PER_UNIT
);
7405 /* Return true if N is a possible register number of function value. */
7408 ix86_function_value_regno_p (const unsigned int regno
)
7417 return TARGET_64BIT
&& ix86_abi
!= MS_ABI
;
7419 /* Complex values are returned in %st(0)/%st(1) pair. */
7422 /* TODO: The function should depend on current function ABI but
7423 builtins.c would need updating then. Therefore we use the
7425 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7427 return TARGET_FLOAT_RETURNS_IN_80387
;
7429 /* Complex values are returned in %xmm0/%xmm1 pair. */
7435 if (TARGET_MACHO
|| TARGET_64BIT
)
7443 /* Define how to find the value returned by a function.
7444 VALTYPE is the data type of the value (as a tree).
7445 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7446 otherwise, FUNC is 0. */
7449 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7450 const_tree fntype
, const_tree fn
)
7454 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7455 we normally prevent this case when mmx is not available. However
7456 some ABIs may require the result to be returned like DImode. */
7457 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7458 regno
= FIRST_MMX_REG
;
7460 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7461 we prevent this case when sse is not available. However some ABIs
7462 may require the result to be returned like integer TImode. */
7463 else if (mode
== TImode
7464 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7465 regno
= FIRST_SSE_REG
;
7467 /* 32-byte vector modes in %ymm0. */
7468 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7469 regno
= FIRST_SSE_REG
;
7471 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7472 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7473 regno
= FIRST_FLOAT_REG
;
7475 /* Most things go in %eax. */
7478 /* Override FP return register with %xmm0 for local functions when
7479 SSE math is enabled or for functions with sseregparm attribute. */
7480 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7482 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7483 if ((sse_level
>= 1 && mode
== SFmode
)
7484 || (sse_level
== 2 && mode
== DFmode
))
7485 regno
= FIRST_SSE_REG
;
7488 /* OImode shouldn't be used directly. */
7489 gcc_assert (mode
!= OImode
);
7491 return gen_rtx_REG (orig_mode
, regno
);
7495 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7500 /* Handle libcalls, which don't provide a type node. */
7501 if (valtype
== NULL
)
7515 regno
= FIRST_SSE_REG
;
7519 regno
= FIRST_FLOAT_REG
;
7527 return gen_rtx_REG (mode
, regno
);
7529 else if (POINTER_TYPE_P (valtype
))
7531 /* Pointers are always returned in word_mode. */
7535 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7536 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7537 x86_64_int_return_registers
, 0);
7539 /* For zero sized structures, construct_container returns NULL, but we
7540 need to keep rest of compiler happy by returning meaningful value. */
7542 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7548 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7551 unsigned int regno
= AX_REG
;
7555 switch (GET_MODE_SIZE (mode
))
7558 if (valtype
!= NULL_TREE
7559 && !VECTOR_INTEGER_TYPE_P (valtype
)
7560 && !VECTOR_INTEGER_TYPE_P (valtype
)
7561 && !INTEGRAL_TYPE_P (valtype
)
7562 && !VECTOR_FLOAT_TYPE_P (valtype
))
7564 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7565 && !COMPLEX_MODE_P (mode
))
7566 regno
= FIRST_SSE_REG
;
7570 if (mode
== SFmode
|| mode
== DFmode
)
7571 regno
= FIRST_SSE_REG
;
7577 return gen_rtx_REG (orig_mode
, regno
);
7581 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7582 enum machine_mode orig_mode
, enum machine_mode mode
)
7584 const_tree fn
, fntype
;
7587 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7588 fn
= fntype_or_decl
;
7589 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7591 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7592 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7593 else if (TARGET_64BIT
)
7594 return function_value_64 (orig_mode
, mode
, valtype
);
7596 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7600 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7601 bool outgoing ATTRIBUTE_UNUSED
)
7603 enum machine_mode mode
, orig_mode
;
7605 orig_mode
= TYPE_MODE (valtype
);
7606 mode
= type_natural_mode (valtype
, NULL
);
7607 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7610 /* Pointer function arguments and return values are promoted to
7613 static enum machine_mode
7614 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7615 int *punsignedp
, const_tree fntype
,
7618 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7620 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7623 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7627 /* Return true if a structure, union or array with MODE containing FIELD
7628 should be accessed using BLKmode. */
7631 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7633 /* Union with XFmode must be in BLKmode. */
7634 return (mode
== XFmode
7635 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7636 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7640 ix86_libcall_value (enum machine_mode mode
)
7642 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7645 /* Return true iff type is returned in memory. */
7647 static bool ATTRIBUTE_UNUSED
7648 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7652 if (mode
== BLKmode
)
7655 size
= int_size_in_bytes (type
);
7657 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7660 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7662 /* User-created vectors small enough to fit in EAX. */
7666 /* MMX/3dNow values are returned in MM0,
7667 except when it doesn't exits or the ABI prescribes otherwise. */
7669 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7671 /* SSE values are returned in XMM0, except when it doesn't exist. */
7675 /* AVX values are returned in YMM0, except when it doesn't exist. */
7686 /* OImode shouldn't be used directly. */
7687 gcc_assert (mode
!= OImode
);
7692 static bool ATTRIBUTE_UNUSED
7693 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7695 int needed_intregs
, needed_sseregs
;
7696 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7699 static bool ATTRIBUTE_UNUSED
7700 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7702 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7704 /* __m128 is returned in xmm0. */
7705 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7706 || VECTOR_FLOAT_TYPE_P (type
))
7707 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7708 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7711 /* Otherwise, the size must be exactly in [1248]. */
7712 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7716 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7718 #ifdef SUBTARGET_RETURN_IN_MEMORY
7719 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7721 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7725 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7726 return return_in_memory_ms_64 (type
, mode
);
7728 return return_in_memory_64 (type
, mode
);
7731 return return_in_memory_32 (type
, mode
);
7735 /* When returning SSE vector types, we have a choice of either
7736 (1) being abi incompatible with a -march switch, or
7737 (2) generating an error.
7738 Given no good solution, I think the safest thing is one warning.
7739 The user won't be able to use -Werror, but....
7741 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7742 called in response to actually generating a caller or callee that
7743 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7744 via aggregate_value_p for general type probing from tree-ssa. */
7747 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7749 static bool warnedsse
, warnedmmx
;
7751 if (!TARGET_64BIT
&& type
)
7753 /* Look at the return type of the function, not the function type. */
7754 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7756 if (!TARGET_SSE
&& !warnedsse
)
7759 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7762 warning (0, "SSE vector return without SSE enabled "
7767 if (!TARGET_MMX
&& !warnedmmx
)
7769 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7772 warning (0, "MMX vector return without MMX enabled "
7782 /* Create the va_list data type. */
7784 /* Returns the calling convention specific va_list date type.
7785 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7788 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7790 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7792 /* For i386 we use plain pointer to argument area. */
7793 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7794 return build_pointer_type (char_type_node
);
7796 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7797 type_decl
= build_decl (BUILTINS_LOCATION
,
7798 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7800 f_gpr
= build_decl (BUILTINS_LOCATION
,
7801 FIELD_DECL
, get_identifier ("gp_offset"),
7802 unsigned_type_node
);
7803 f_fpr
= build_decl (BUILTINS_LOCATION
,
7804 FIELD_DECL
, get_identifier ("fp_offset"),
7805 unsigned_type_node
);
7806 f_ovf
= build_decl (BUILTINS_LOCATION
,
7807 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7809 f_sav
= build_decl (BUILTINS_LOCATION
,
7810 FIELD_DECL
, get_identifier ("reg_save_area"),
7813 va_list_gpr_counter_field
= f_gpr
;
7814 va_list_fpr_counter_field
= f_fpr
;
7816 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7817 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7818 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7819 DECL_FIELD_CONTEXT (f_sav
) = record
;
7821 TYPE_STUB_DECL (record
) = type_decl
;
7822 TYPE_NAME (record
) = type_decl
;
7823 TYPE_FIELDS (record
) = f_gpr
;
7824 DECL_CHAIN (f_gpr
) = f_fpr
;
7825 DECL_CHAIN (f_fpr
) = f_ovf
;
7826 DECL_CHAIN (f_ovf
) = f_sav
;
7828 layout_type (record
);
7830 /* The correct type is an array type of one element. */
7831 return build_array_type (record
, build_index_type (size_zero_node
));
7834 /* Setup the builtin va_list data type and for 64-bit the additional
7835 calling convention specific va_list data types. */
7838 ix86_build_builtin_va_list (void)
7840 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7842 /* Initialize abi specific va_list builtin types. */
7846 if (ix86_abi
== MS_ABI
)
7848 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7849 if (TREE_CODE (t
) != RECORD_TYPE
)
7850 t
= build_variant_type_copy (t
);
7851 sysv_va_list_type_node
= t
;
7856 if (TREE_CODE (t
) != RECORD_TYPE
)
7857 t
= build_variant_type_copy (t
);
7858 sysv_va_list_type_node
= t
;
7860 if (ix86_abi
!= MS_ABI
)
7862 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7863 if (TREE_CODE (t
) != RECORD_TYPE
)
7864 t
= build_variant_type_copy (t
);
7865 ms_va_list_type_node
= t
;
7870 if (TREE_CODE (t
) != RECORD_TYPE
)
7871 t
= build_variant_type_copy (t
);
7872 ms_va_list_type_node
= t
;
7879 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7882 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7888 /* GPR size of varargs save area. */
7889 if (cfun
->va_list_gpr_size
)
7890 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7892 ix86_varargs_gpr_size
= 0;
7894 /* FPR size of varargs save area. We don't need it if we don't pass
7895 anything in SSE registers. */
7896 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7897 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7899 ix86_varargs_fpr_size
= 0;
7901 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7904 save_area
= frame_pointer_rtx
;
7905 set
= get_varargs_alias_set ();
7907 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7908 if (max
> X86_64_REGPARM_MAX
)
7909 max
= X86_64_REGPARM_MAX
;
7911 for (i
= cum
->regno
; i
< max
; i
++)
7913 mem
= gen_rtx_MEM (word_mode
,
7914 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7915 MEM_NOTRAP_P (mem
) = 1;
7916 set_mem_alias_set (mem
, set
);
7917 emit_move_insn (mem
,
7918 gen_rtx_REG (word_mode
,
7919 x86_64_int_parameter_registers
[i
]));
7922 if (ix86_varargs_fpr_size
)
7924 enum machine_mode smode
;
7927 /* Now emit code to save SSE registers. The AX parameter contains number
7928 of SSE parameter registers used to call this function, though all we
7929 actually check here is the zero/non-zero status. */
7931 label
= gen_label_rtx ();
7932 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7933 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7936 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7937 we used movdqa (i.e. TImode) instead? Perhaps even better would
7938 be if we could determine the real mode of the data, via a hook
7939 into pass_stdarg. Ignore all that for now. */
7941 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7942 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7944 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7945 if (max
> X86_64_SSE_REGPARM_MAX
)
7946 max
= X86_64_SSE_REGPARM_MAX
;
7948 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7950 mem
= plus_constant (Pmode
, save_area
,
7951 i
* 16 + ix86_varargs_gpr_size
);
7952 mem
= gen_rtx_MEM (smode
, mem
);
7953 MEM_NOTRAP_P (mem
) = 1;
7954 set_mem_alias_set (mem
, set
);
7955 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7957 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7965 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7967 alias_set_type set
= get_varargs_alias_set ();
7970 /* Reset to zero, as there might be a sysv vaarg used
7972 ix86_varargs_gpr_size
= 0;
7973 ix86_varargs_fpr_size
= 0;
7975 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7979 mem
= gen_rtx_MEM (Pmode
,
7980 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7981 i
* UNITS_PER_WORD
));
7982 MEM_NOTRAP_P (mem
) = 1;
7983 set_mem_alias_set (mem
, set
);
7985 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7986 emit_move_insn (mem
, reg
);
7991 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7992 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7995 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7996 CUMULATIVE_ARGS next_cum
;
7999 /* This argument doesn't appear to be used anymore. Which is good,
8000 because the old code here didn't suppress rtl generation. */
8001 gcc_assert (!no_rtl
);
8006 fntype
= TREE_TYPE (current_function_decl
);
8008 /* For varargs, we do not want to skip the dummy va_dcl argument.
8009 For stdargs, we do want to skip the last named argument. */
8011 if (stdarg_p (fntype
))
8012 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
8015 if (cum
->call_abi
== MS_ABI
)
8016 setup_incoming_varargs_ms_64 (&next_cum
);
8018 setup_incoming_varargs_64 (&next_cum
);
8021 /* Checks if TYPE is of kind va_list char *. */
8024 is_va_list_char_pointer (tree type
)
8028 /* For 32-bit it is always true. */
8031 canonic
= ix86_canonical_va_list_type (type
);
8032 return (canonic
== ms_va_list_type_node
8033 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8036 /* Implement va_start. */
8039 ix86_va_start (tree valist
, rtx nextarg
)
8041 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8042 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8043 tree gpr
, fpr
, ovf
, sav
, t
;
8047 if (flag_split_stack
8048 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8050 unsigned int scratch_regno
;
8052 /* When we are splitting the stack, we can't refer to the stack
8053 arguments using internal_arg_pointer, because they may be on
8054 the old stack. The split stack prologue will arrange to
8055 leave a pointer to the old stack arguments in a scratch
8056 register, which we here copy to a pseudo-register. The split
8057 stack prologue can't set the pseudo-register directly because
8058 it (the prologue) runs before any registers have been saved. */
8060 scratch_regno
= split_stack_prologue_scratch_regno ();
8061 if (scratch_regno
!= INVALID_REGNUM
)
8065 reg
= gen_reg_rtx (Pmode
);
8066 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8069 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8073 push_topmost_sequence ();
8074 emit_insn_after (seq
, entry_of_function ());
8075 pop_topmost_sequence ();
8079 /* Only 64bit target needs something special. */
8080 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8082 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8083 std_expand_builtin_va_start (valist
, nextarg
);
8088 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8089 next
= expand_binop (ptr_mode
, add_optab
,
8090 cfun
->machine
->split_stack_varargs_pointer
,
8091 crtl
->args
.arg_offset_rtx
,
8092 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8093 convert_move (va_r
, next
, 0);
8098 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8099 f_fpr
= DECL_CHAIN (f_gpr
);
8100 f_ovf
= DECL_CHAIN (f_fpr
);
8101 f_sav
= DECL_CHAIN (f_ovf
);
8103 valist
= build_simple_mem_ref (valist
);
8104 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8105 /* The following should be folded into the MEM_REF offset. */
8106 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8108 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8110 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8112 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8115 /* Count number of gp and fp argument registers used. */
8116 words
= crtl
->args
.info
.words
;
8117 n_gpr
= crtl
->args
.info
.regno
;
8118 n_fpr
= crtl
->args
.info
.sse_regno
;
8120 if (cfun
->va_list_gpr_size
)
8122 type
= TREE_TYPE (gpr
);
8123 t
= build2 (MODIFY_EXPR
, type
,
8124 gpr
, build_int_cst (type
, n_gpr
* 8));
8125 TREE_SIDE_EFFECTS (t
) = 1;
8126 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8129 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8131 type
= TREE_TYPE (fpr
);
8132 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8133 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8134 TREE_SIDE_EFFECTS (t
) = 1;
8135 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8138 /* Find the overflow area. */
8139 type
= TREE_TYPE (ovf
);
8140 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8141 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8143 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8144 t
= make_tree (type
, ovf_rtx
);
8146 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8147 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8148 TREE_SIDE_EFFECTS (t
) = 1;
8149 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8151 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8153 /* Find the register save area.
8154 Prologue of the function save it right above stack frame. */
8155 type
= TREE_TYPE (sav
);
8156 t
= make_tree (type
, frame_pointer_rtx
);
8157 if (!ix86_varargs_gpr_size
)
8158 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8159 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8160 TREE_SIDE_EFFECTS (t
) = 1;
8161 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8165 /* Implement va_arg. */
8168 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8171 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8172 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8173 tree gpr
, fpr
, ovf
, sav
, t
;
8175 tree lab_false
, lab_over
= NULL_TREE
;
8180 enum machine_mode nat_mode
;
8181 unsigned int arg_boundary
;
8183 /* Only 64bit target needs something special. */
8184 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8185 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8187 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8188 f_fpr
= DECL_CHAIN (f_gpr
);
8189 f_ovf
= DECL_CHAIN (f_fpr
);
8190 f_sav
= DECL_CHAIN (f_ovf
);
8192 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8193 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8194 valist
= build_va_arg_indirect_ref (valist
);
8195 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8196 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8197 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8199 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8201 type
= build_pointer_type (type
);
8202 size
= int_size_in_bytes (type
);
8203 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8205 nat_mode
= type_natural_mode (type
, NULL
);
8214 /* Unnamed 256bit vector mode parameters are passed on stack. */
8215 if (!TARGET_64BIT_MS_ABI
)
8222 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8223 type
, 0, X86_64_REGPARM_MAX
,
8224 X86_64_SSE_REGPARM_MAX
, intreg
,
8229 /* Pull the value out of the saved registers. */
8231 addr
= create_tmp_var (ptr_type_node
, "addr");
8235 int needed_intregs
, needed_sseregs
;
8237 tree int_addr
, sse_addr
;
8239 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8240 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8242 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8244 need_temp
= (!REG_P (container
)
8245 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8246 || TYPE_ALIGN (type
) > 128));
8248 /* In case we are passing structure, verify that it is consecutive block
8249 on the register save area. If not we need to do moves. */
8250 if (!need_temp
&& !REG_P (container
))
8252 /* Verify that all registers are strictly consecutive */
8253 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8257 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8259 rtx slot
= XVECEXP (container
, 0, i
);
8260 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8261 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8269 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8271 rtx slot
= XVECEXP (container
, 0, i
);
8272 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8273 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8285 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8286 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8289 /* First ensure that we fit completely in registers. */
8292 t
= build_int_cst (TREE_TYPE (gpr
),
8293 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8294 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8295 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8296 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8297 gimplify_and_add (t
, pre_p
);
8301 t
= build_int_cst (TREE_TYPE (fpr
),
8302 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8303 + X86_64_REGPARM_MAX
* 8);
8304 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8305 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8306 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8307 gimplify_and_add (t
, pre_p
);
8310 /* Compute index to start of area used for integer regs. */
8313 /* int_addr = gpr + sav; */
8314 t
= fold_build_pointer_plus (sav
, gpr
);
8315 gimplify_assign (int_addr
, t
, pre_p
);
8319 /* sse_addr = fpr + sav; */
8320 t
= fold_build_pointer_plus (sav
, fpr
);
8321 gimplify_assign (sse_addr
, t
, pre_p
);
8325 int i
, prev_size
= 0;
8326 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8329 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8330 gimplify_assign (addr
, t
, pre_p
);
8332 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8334 rtx slot
= XVECEXP (container
, 0, i
);
8335 rtx reg
= XEXP (slot
, 0);
8336 enum machine_mode mode
= GET_MODE (reg
);
8342 tree dest_addr
, dest
;
8343 int cur_size
= GET_MODE_SIZE (mode
);
8345 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8346 prev_size
= INTVAL (XEXP (slot
, 1));
8347 if (prev_size
+ cur_size
> size
)
8349 cur_size
= size
- prev_size
;
8350 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8351 if (mode
== BLKmode
)
8354 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8355 if (mode
== GET_MODE (reg
))
8356 addr_type
= build_pointer_type (piece_type
);
8358 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8360 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8363 if (SSE_REGNO_P (REGNO (reg
)))
8365 src_addr
= sse_addr
;
8366 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8370 src_addr
= int_addr
;
8371 src_offset
= REGNO (reg
) * 8;
8373 src_addr
= fold_convert (addr_type
, src_addr
);
8374 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8376 dest_addr
= fold_convert (daddr_type
, addr
);
8377 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8378 if (cur_size
== GET_MODE_SIZE (mode
))
8380 src
= build_va_arg_indirect_ref (src_addr
);
8381 dest
= build_va_arg_indirect_ref (dest_addr
);
8383 gimplify_assign (dest
, src
, pre_p
);
8388 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8389 3, dest_addr
, src_addr
,
8390 size_int (cur_size
));
8391 gimplify_and_add (copy
, pre_p
);
8393 prev_size
+= cur_size
;
8399 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8400 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8401 gimplify_assign (gpr
, t
, pre_p
);
8406 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8407 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8408 gimplify_assign (fpr
, t
, pre_p
);
8411 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8413 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8416 /* ... otherwise out of the overflow area. */
8418 /* When we align parameter on stack for caller, if the parameter
8419 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8420 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8421 here with caller. */
8422 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8423 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8424 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8426 /* Care for on-stack alignment if needed. */
8427 if (arg_boundary
<= 64 || size
== 0)
8431 HOST_WIDE_INT align
= arg_boundary
/ 8;
8432 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8433 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8434 build_int_cst (TREE_TYPE (t
), -align
));
8437 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8438 gimplify_assign (addr
, t
, pre_p
);
8440 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8441 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8444 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8446 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8447 addr
= fold_convert (ptrtype
, addr
);
8450 addr
= build_va_arg_indirect_ref (addr
);
8451 return build_va_arg_indirect_ref (addr
);
8454 /* Return true if OPNUM's MEM should be matched
8455 in movabs* patterns. */
8458 ix86_check_movabs (rtx insn
, int opnum
)
8462 set
= PATTERN (insn
);
8463 if (GET_CODE (set
) == PARALLEL
)
8464 set
= XVECEXP (set
, 0, 0);
8465 gcc_assert (GET_CODE (set
) == SET
);
8466 mem
= XEXP (set
, opnum
);
8467 while (GET_CODE (mem
) == SUBREG
)
8468 mem
= SUBREG_REG (mem
);
8469 gcc_assert (MEM_P (mem
));
8470 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8473 /* Initialize the table of extra 80387 mathematical constants. */
8476 init_ext_80387_constants (void)
8478 static const char * cst
[5] =
8480 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8481 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8482 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8483 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8484 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8488 for (i
= 0; i
< 5; i
++)
8490 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8491 /* Ensure each constant is rounded to XFmode precision. */
8492 real_convert (&ext_80387_constants_table
[i
],
8493 XFmode
, &ext_80387_constants_table
[i
]);
8496 ext_80387_constants_init
= 1;
8499 /* Return non-zero if the constant is something that
8500 can be loaded with a special instruction. */
8503 standard_80387_constant_p (rtx x
)
8505 enum machine_mode mode
= GET_MODE (x
);
8509 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8512 if (x
== CONST0_RTX (mode
))
8514 if (x
== CONST1_RTX (mode
))
8517 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8519 /* For XFmode constants, try to find a special 80387 instruction when
8520 optimizing for size or on those CPUs that benefit from them. */
8522 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8526 if (! ext_80387_constants_init
)
8527 init_ext_80387_constants ();
8529 for (i
= 0; i
< 5; i
++)
8530 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8534 /* Load of the constant -0.0 or -1.0 will be split as
8535 fldz;fchs or fld1;fchs sequence. */
8536 if (real_isnegzero (&r
))
8538 if (real_identical (&r
, &dconstm1
))
8544 /* Return the opcode of the special instruction to be used to load
8548 standard_80387_constant_opcode (rtx x
)
8550 switch (standard_80387_constant_p (x
))
8574 /* Return the CONST_DOUBLE representing the 80387 constant that is
8575 loaded by the specified special instruction. The argument IDX
8576 matches the return value from standard_80387_constant_p. */
8579 standard_80387_constant_rtx (int idx
)
8583 if (! ext_80387_constants_init
)
8584 init_ext_80387_constants ();
8600 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8604 /* Return 1 if X is all 0s and 2 if x is all 1s
8605 in supported SSE/AVX vector mode. */
8608 standard_sse_constant_p (rtx x
)
8610 enum machine_mode mode
= GET_MODE (x
);
8612 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8614 if (vector_all_ones_operand (x
, mode
))
8636 /* Return the opcode of the special instruction to be used to load
8640 standard_sse_constant_opcode (rtx insn
, rtx x
)
8642 switch (standard_sse_constant_p (x
))
8645 switch (get_attr_mode (insn
))
8648 return "%vpxor\t%0, %d0";
8650 return "%vxorpd\t%0, %d0";
8652 return "%vxorps\t%0, %d0";
8655 return "vpxor\t%x0, %x0, %x0";
8657 return "vxorpd\t%x0, %x0, %x0";
8659 return "vxorps\t%x0, %x0, %x0";
8666 if (get_attr_mode (insn
) == MODE_XI
8667 || get_attr_mode (insn
) == MODE_V8DF
8668 || get_attr_mode (insn
) == MODE_V16SF
)
8669 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8671 return "vpcmpeqd\t%0, %0, %0";
8673 return "pcmpeqd\t%0, %0";
8681 /* Returns true if OP contains a symbol reference */
8684 symbolic_reference_mentioned_p (rtx op
)
8689 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8692 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8693 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8699 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8700 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8704 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8711 /* Return true if it is appropriate to emit `ret' instructions in the
8712 body of a function. Do this only if the epilogue is simple, needing a
8713 couple of insns. Prior to reloading, we can't tell how many registers
8714 must be saved, so return false then. Return false if there is no frame
8715 marker to de-allocate. */
8718 ix86_can_use_return_insn_p (void)
8720 struct ix86_frame frame
;
8722 if (! reload_completed
|| frame_pointer_needed
)
8725 /* Don't allow more than 32k pop, since that's all we can do
8726 with one instruction. */
8727 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8730 ix86_compute_frame_layout (&frame
);
8731 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8732 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8735 /* Value should be nonzero if functions must have frame pointers.
8736 Zero means the frame pointer need not be set up (and parms may
8737 be accessed via the stack pointer) in functions that seem suitable. */
8740 ix86_frame_pointer_required (void)
8742 /* If we accessed previous frames, then the generated code expects
8743 to be able to access the saved ebp value in our frame. */
8744 if (cfun
->machine
->accesses_prev_frame
)
8747 /* Several x86 os'es need a frame pointer for other reasons,
8748 usually pertaining to setjmp. */
8749 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8752 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8753 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8756 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8757 allocation is 4GB. */
8758 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8761 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8762 turns off the frame pointer by default. Turn it back on now if
8763 we've not got a leaf function. */
8764 if (TARGET_OMIT_LEAF_FRAME_POINTER
8766 || ix86_current_function_calls_tls_descriptor
))
8769 if (crtl
->profile
&& !flag_fentry
)
8775 /* Record that the current function accesses previous call frames. */
8778 ix86_setup_frame_addresses (void)
8780 cfun
->machine
->accesses_prev_frame
= 1;
8783 #ifndef USE_HIDDEN_LINKONCE
8784 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8785 # define USE_HIDDEN_LINKONCE 1
8787 # define USE_HIDDEN_LINKONCE 0
8791 static int pic_labels_used
;
8793 /* Fills in the label name that should be used for a pc thunk for
8794 the given register. */
8797 get_pc_thunk_name (char name
[32], unsigned int regno
)
8799 gcc_assert (!TARGET_64BIT
);
8801 if (USE_HIDDEN_LINKONCE
)
8802 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8804 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8808 /* This function generates code for -fpic that loads %ebx with
8809 the return address of the caller and then returns. */
8812 ix86_code_end (void)
8817 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8822 if (!(pic_labels_used
& (1 << regno
)))
8825 get_pc_thunk_name (name
, regno
);
8827 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8828 get_identifier (name
),
8829 build_function_type_list (void_type_node
, NULL_TREE
));
8830 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8831 NULL_TREE
, void_type_node
);
8832 TREE_PUBLIC (decl
) = 1;
8833 TREE_STATIC (decl
) = 1;
8834 DECL_IGNORED_P (decl
) = 1;
8839 switch_to_section (darwin_sections
[text_coal_section
]);
8840 fputs ("\t.weak_definition\t", asm_out_file
);
8841 assemble_name (asm_out_file
, name
);
8842 fputs ("\n\t.private_extern\t", asm_out_file
);
8843 assemble_name (asm_out_file
, name
);
8844 putc ('\n', asm_out_file
);
8845 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8846 DECL_WEAK (decl
) = 1;
8850 if (USE_HIDDEN_LINKONCE
)
8852 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8854 targetm
.asm_out
.unique_section (decl
, 0);
8855 switch_to_section (get_named_section (decl
, NULL
, 0));
8857 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8858 fputs ("\t.hidden\t", asm_out_file
);
8859 assemble_name (asm_out_file
, name
);
8860 putc ('\n', asm_out_file
);
8861 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8865 switch_to_section (text_section
);
8866 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8869 DECL_INITIAL (decl
) = make_node (BLOCK
);
8870 current_function_decl
= decl
;
8871 init_function_start (decl
);
8872 first_function_block_is_cold
= false;
8873 /* Make sure unwind info is emitted for the thunk if needed. */
8874 final_start_function (emit_barrier (), asm_out_file
, 1);
8876 /* Pad stack IP move with 4 instructions (two NOPs count
8877 as one instruction). */
8878 if (TARGET_PAD_SHORT_FUNCTION
)
8883 fputs ("\tnop\n", asm_out_file
);
8886 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8887 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8888 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8889 output_asm_insn ("%!ret", NULL
);
8890 final_end_function ();
8891 init_insn_lengths ();
8892 free_after_compilation (cfun
);
8894 current_function_decl
= NULL
;
8897 if (flag_split_stack
)
8898 file_end_indicate_split_stack ();
8901 /* Emit code for the SET_GOT patterns. */
8904 output_set_got (rtx dest
, rtx label
)
8910 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8912 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8913 xops
[2] = gen_rtx_MEM (Pmode
,
8914 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8915 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8917 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8918 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8919 an unadorned address. */
8920 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8921 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8922 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8926 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8931 /* We don't need a pic base, we're not producing pic. */
8934 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8935 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8936 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8937 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8942 get_pc_thunk_name (name
, REGNO (dest
));
8943 pic_labels_used
|= 1 << REGNO (dest
);
8945 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8946 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8947 output_asm_insn ("%!call\t%X2", xops
);
8950 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
8951 This is what will be referenced by the Mach-O PIC subsystem. */
8952 if (machopic_should_output_picbase_label () || !label
)
8953 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8955 /* When we are restoring the pic base at the site of a nonlocal label,
8956 and we decided to emit the pic base above, we will still output a
8957 local label used for calculating the correction offset (even though
8958 the offset will be 0 in that case). */
8960 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8961 CODE_LABEL_NUMBER (label
));
8966 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8971 /* Generate an "push" pattern for input ARG. */
8976 struct machine_function
*m
= cfun
->machine
;
8978 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8979 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8980 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8982 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8983 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8985 return gen_rtx_SET (VOIDmode
,
8986 gen_rtx_MEM (word_mode
,
8987 gen_rtx_PRE_DEC (Pmode
,
8988 stack_pointer_rtx
)),
8992 /* Generate an "pop" pattern for input ARG. */
8997 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8998 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9000 return gen_rtx_SET (VOIDmode
,
9002 gen_rtx_MEM (word_mode
,
9003 gen_rtx_POST_INC (Pmode
,
9004 stack_pointer_rtx
)));
9007 /* Return >= 0 if there is an unused call-clobbered register available
9008 for the entire function. */
9011 ix86_select_alt_pic_regnum (void)
9015 && !ix86_current_function_calls_tls_descriptor
)
9018 /* Can't use the same register for both PIC and DRAP. */
9020 drap
= REGNO (crtl
->drap_reg
);
9023 for (i
= 2; i
>= 0; --i
)
9024 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9028 return INVALID_REGNUM
;
9031 /* Return TRUE if we need to save REGNO. */
9034 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9036 if (pic_offset_table_rtx
9037 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9038 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9040 || crtl
->calls_eh_return
9041 || crtl
->uses_const_pool
9042 || cfun
->has_nonlocal_label
))
9043 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9045 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9050 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9051 if (test
== INVALID_REGNUM
)
9058 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9061 return (df_regs_ever_live_p (regno
)
9062 && !call_used_regs
[regno
]
9063 && !fixed_regs
[regno
]
9064 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9067 /* Return number of saved general prupose registers. */
9070 ix86_nsaved_regs (void)
9075 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9076 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9081 /* Return number of saved SSE registrers. */
9084 ix86_nsaved_sseregs (void)
9089 if (!TARGET_64BIT_MS_ABI
)
9091 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9092 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9097 /* Given FROM and TO register numbers, say whether this elimination is
9098 allowed. If stack alignment is needed, we can only replace argument
9099 pointer with hard frame pointer, or replace frame pointer with stack
9100 pointer. Otherwise, frame pointer elimination is automatically
9101 handled and all other eliminations are valid. */
9104 ix86_can_eliminate (const int from
, const int to
)
9106 if (stack_realign_fp
)
9107 return ((from
== ARG_POINTER_REGNUM
9108 && to
== HARD_FRAME_POINTER_REGNUM
)
9109 || (from
== FRAME_POINTER_REGNUM
9110 && to
== STACK_POINTER_REGNUM
));
9112 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9115 /* Return the offset between two registers, one to be eliminated, and the other
9116 its replacement, at the start of a routine. */
9119 ix86_initial_elimination_offset (int from
, int to
)
9121 struct ix86_frame frame
;
9122 ix86_compute_frame_layout (&frame
);
9124 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9125 return frame
.hard_frame_pointer_offset
;
9126 else if (from
== FRAME_POINTER_REGNUM
9127 && to
== HARD_FRAME_POINTER_REGNUM
)
9128 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9131 gcc_assert (to
== STACK_POINTER_REGNUM
);
9133 if (from
== ARG_POINTER_REGNUM
)
9134 return frame
.stack_pointer_offset
;
9136 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9137 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9141 /* In a dynamically-aligned function, we can't know the offset from
9142 stack pointer to frame pointer, so we must ensure that setjmp
9143 eliminates fp against the hard fp (%ebp) rather than trying to
9144 index from %esp up to the top of the frame across a gap that is
9145 of unknown (at compile-time) size. */
9147 ix86_builtin_setjmp_frame_value (void)
9149 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9152 /* When using -fsplit-stack, the allocation routines set a field in
9153 the TCB to the bottom of the stack plus this much space, measured
9156 #define SPLIT_STACK_AVAILABLE 256
9158 /* Fill structure ix86_frame about frame of currently computed function. */
9161 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9163 unsigned HOST_WIDE_INT stack_alignment_needed
;
9164 HOST_WIDE_INT offset
;
9165 unsigned HOST_WIDE_INT preferred_alignment
;
9166 HOST_WIDE_INT size
= get_frame_size ();
9167 HOST_WIDE_INT to_allocate
;
9169 frame
->nregs
= ix86_nsaved_regs ();
9170 frame
->nsseregs
= ix86_nsaved_sseregs ();
9172 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9173 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9175 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9176 function prologues and leaf. */
9177 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9178 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9179 || ix86_current_function_calls_tls_descriptor
))
9181 preferred_alignment
= 16;
9182 stack_alignment_needed
= 16;
9183 crtl
->preferred_stack_boundary
= 128;
9184 crtl
->stack_alignment_needed
= 128;
9187 gcc_assert (!size
|| stack_alignment_needed
);
9188 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9189 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9191 /* For SEH we have to limit the amount of code movement into the prologue.
9192 At present we do this via a BLOCKAGE, at which point there's very little
9193 scheduling that can be done, which means that there's very little point
9194 in doing anything except PUSHs. */
9196 cfun
->machine
->use_fast_prologue_epilogue
= false;
9198 /* During reload iteration the amount of registers saved can change.
9199 Recompute the value as needed. Do not recompute when amount of registers
9200 didn't change as reload does multiple calls to the function and does not
9201 expect the decision to change within single iteration. */
9202 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR
)
9203 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9205 int count
= frame
->nregs
;
9206 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9208 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9210 /* The fast prologue uses move instead of push to save registers. This
9211 is significantly longer, but also executes faster as modern hardware
9212 can execute the moves in parallel, but can't do that for push/pop.
9214 Be careful about choosing what prologue to emit: When function takes
9215 many instructions to execute we may use slow version as well as in
9216 case function is known to be outside hot spot (this is known with
9217 feedback only). Weight the size of function by number of registers
9218 to save as it is cheap to use one or two push instructions but very
9219 slow to use many of them. */
9221 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9222 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9223 || (flag_branch_probabilities
9224 && node
->frequency
< NODE_FREQUENCY_HOT
))
9225 cfun
->machine
->use_fast_prologue_epilogue
= false;
9227 cfun
->machine
->use_fast_prologue_epilogue
9228 = !expensive_function_p (count
);
9231 frame
->save_regs_using_mov
9232 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9233 /* If static stack checking is enabled and done with probes,
9234 the registers need to be saved before allocating the frame. */
9235 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9237 /* Skip return address. */
9238 offset
= UNITS_PER_WORD
;
9240 /* Skip pushed static chain. */
9241 if (ix86_static_chain_on_stack
)
9242 offset
+= UNITS_PER_WORD
;
9244 /* Skip saved base pointer. */
9245 if (frame_pointer_needed
)
9246 offset
+= UNITS_PER_WORD
;
9247 frame
->hfp_save_offset
= offset
;
9249 /* The traditional frame pointer location is at the top of the frame. */
9250 frame
->hard_frame_pointer_offset
= offset
;
9252 /* Register save area */
9253 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9254 frame
->reg_save_offset
= offset
;
9256 /* On SEH target, registers are pushed just before the frame pointer
9259 frame
->hard_frame_pointer_offset
= offset
;
9261 /* Align and set SSE register save area. */
9262 if (frame
->nsseregs
)
9264 /* The only ABI that has saved SSE registers (Win64) also has a
9265 16-byte aligned default stack, and thus we don't need to be
9266 within the re-aligned local stack frame to save them. */
9267 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9268 offset
= (offset
+ 16 - 1) & -16;
9269 offset
+= frame
->nsseregs
* 16;
9271 frame
->sse_reg_save_offset
= offset
;
9273 /* The re-aligned stack starts here. Values before this point are not
9274 directly comparable with values below this point. In order to make
9275 sure that no value happens to be the same before and after, force
9276 the alignment computation below to add a non-zero value. */
9277 if (stack_realign_fp
)
9278 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9281 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9282 offset
+= frame
->va_arg_size
;
9284 /* Align start of frame for local function. */
9285 if (stack_realign_fp
9286 || offset
!= frame
->sse_reg_save_offset
9289 || cfun
->calls_alloca
9290 || ix86_current_function_calls_tls_descriptor
)
9291 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9293 /* Frame pointer points here. */
9294 frame
->frame_pointer_offset
= offset
;
9298 /* Add outgoing arguments area. Can be skipped if we eliminated
9299 all the function calls as dead code.
9300 Skipping is however impossible when function calls alloca. Alloca
9301 expander assumes that last crtl->outgoing_args_size
9302 of stack frame are unused. */
9303 if (ACCUMULATE_OUTGOING_ARGS
9304 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9305 || ix86_current_function_calls_tls_descriptor
))
9307 offset
+= crtl
->outgoing_args_size
;
9308 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9311 frame
->outgoing_arguments_size
= 0;
9313 /* Align stack boundary. Only needed if we're calling another function
9315 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9316 || ix86_current_function_calls_tls_descriptor
)
9317 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9319 /* We've reached end of stack frame. */
9320 frame
->stack_pointer_offset
= offset
;
9322 /* Size prologue needs to allocate. */
9323 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9325 if ((!to_allocate
&& frame
->nregs
<= 1)
9326 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9327 frame
->save_regs_using_mov
= false;
9329 if (ix86_using_red_zone ()
9330 && crtl
->sp_is_unchanging
9332 && !ix86_current_function_calls_tls_descriptor
)
9334 frame
->red_zone_size
= to_allocate
;
9335 if (frame
->save_regs_using_mov
)
9336 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9337 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9338 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9341 frame
->red_zone_size
= 0;
9342 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9344 /* The SEH frame pointer location is near the bottom of the frame.
9345 This is enforced by the fact that the difference between the
9346 stack pointer and the frame pointer is limited to 240 bytes in
9347 the unwind data structure. */
9352 /* If we can leave the frame pointer where it is, do so. Also, returns
9353 the establisher frame for __builtin_frame_address (0). */
9354 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9355 if (diff
<= SEH_MAX_FRAME_SIZE
9356 && (diff
> 240 || (diff
& 15) != 0)
9357 && !crtl
->accesses_prior_frames
)
9359 /* Ideally we'd determine what portion of the local stack frame
9360 (within the constraint of the lowest 240) is most heavily used.
9361 But without that complication, simply bias the frame pointer
9362 by 128 bytes so as to maximize the amount of the local stack
9363 frame that is addressable with 8-bit offsets. */
9364 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9369 /* This is semi-inlined memory_address_length, but simplified
9370 since we know that we're always dealing with reg+offset, and
9371 to avoid having to create and discard all that rtl. */
9374 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9380 /* EBP and R13 cannot be encoded without an offset. */
9381 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9383 else if (IN_RANGE (offset
, -128, 127))
9386 /* ESP and R12 must be encoded with a SIB byte. */
9387 if (regno
== SP_REG
|| regno
== R12_REG
)
9393 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9394 The valid base registers are taken from CFUN->MACHINE->FS. */
9397 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9399 const struct machine_function
*m
= cfun
->machine
;
9400 rtx base_reg
= NULL
;
9401 HOST_WIDE_INT base_offset
= 0;
9403 if (m
->use_fast_prologue_epilogue
)
9405 /* Choose the base register most likely to allow the most scheduling
9406 opportunities. Generally FP is valid throughout the function,
9407 while DRAP must be reloaded within the epilogue. But choose either
9408 over the SP due to increased encoding size. */
9412 base_reg
= hard_frame_pointer_rtx
;
9413 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9415 else if (m
->fs
.drap_valid
)
9417 base_reg
= crtl
->drap_reg
;
9418 base_offset
= 0 - cfa_offset
;
9420 else if (m
->fs
.sp_valid
)
9422 base_reg
= stack_pointer_rtx
;
9423 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9428 HOST_WIDE_INT toffset
;
9431 /* Choose the base register with the smallest address encoding.
9432 With a tie, choose FP > DRAP > SP. */
9435 base_reg
= stack_pointer_rtx
;
9436 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9437 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9439 if (m
->fs
.drap_valid
)
9441 toffset
= 0 - cfa_offset
;
9442 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9445 base_reg
= crtl
->drap_reg
;
9446 base_offset
= toffset
;
9452 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9453 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9456 base_reg
= hard_frame_pointer_rtx
;
9457 base_offset
= toffset
;
9462 gcc_assert (base_reg
!= NULL
);
9464 return plus_constant (Pmode
, base_reg
, base_offset
);
9467 /* Emit code to save registers in the prologue. */
9470 ix86_emit_save_regs (void)
9475 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9476 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9478 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9479 RTX_FRAME_RELATED_P (insn
) = 1;
9483 /* Emit a single register save at CFA - CFA_OFFSET. */
9486 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9487 HOST_WIDE_INT cfa_offset
)
9489 struct machine_function
*m
= cfun
->machine
;
9490 rtx reg
= gen_rtx_REG (mode
, regno
);
9491 rtx mem
, addr
, base
, insn
;
9493 addr
= choose_baseaddr (cfa_offset
);
9494 mem
= gen_frame_mem (mode
, addr
);
9496 /* For SSE saves, we need to indicate the 128-bit alignment. */
9497 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9499 insn
= emit_move_insn (mem
, reg
);
9500 RTX_FRAME_RELATED_P (insn
) = 1;
9503 if (GET_CODE (base
) == PLUS
)
9504 base
= XEXP (base
, 0);
9505 gcc_checking_assert (REG_P (base
));
9507 /* When saving registers into a re-aligned local stack frame, avoid
9508 any tricky guessing by dwarf2out. */
9509 if (m
->fs
.realigned
)
9511 gcc_checking_assert (stack_realign_drap
);
9513 if (regno
== REGNO (crtl
->drap_reg
))
9515 /* A bit of a hack. We force the DRAP register to be saved in
9516 the re-aligned stack frame, which provides us with a copy
9517 of the CFA that will last past the prologue. Install it. */
9518 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9519 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9520 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9521 mem
= gen_rtx_MEM (mode
, addr
);
9522 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9526 /* The frame pointer is a stable reference within the
9527 aligned frame. Use it. */
9528 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9529 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9530 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9531 mem
= gen_rtx_MEM (mode
, addr
);
9532 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9533 gen_rtx_SET (VOIDmode
, mem
, reg
));
9537 /* The memory may not be relative to the current CFA register,
9538 which means that we may need to generate a new pattern for
9539 use by the unwind info. */
9540 else if (base
!= m
->fs
.cfa_reg
)
9542 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9543 m
->fs
.cfa_offset
- cfa_offset
);
9544 mem
= gen_rtx_MEM (mode
, addr
);
9545 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9549 /* Emit code to save registers using MOV insns.
9550 First register is stored at CFA - CFA_OFFSET. */
9552 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9556 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9557 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9559 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9560 cfa_offset
-= UNITS_PER_WORD
;
9564 /* Emit code to save SSE registers using MOV insns.
9565 First register is stored at CFA - CFA_OFFSET. */
9567 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9571 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9572 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9574 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9579 static GTY(()) rtx queued_cfa_restores
;
9581 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9582 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9583 Don't add the note if the previously saved value will be left untouched
9584 within stack red-zone till return, as unwinders can find the same value
9585 in the register and on the stack. */
9588 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9590 if (!crtl
->shrink_wrapped
9591 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9596 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9597 RTX_FRAME_RELATED_P (insn
) = 1;
9601 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9604 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9607 ix86_add_queued_cfa_restore_notes (rtx insn
)
9610 if (!queued_cfa_restores
)
9612 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9614 XEXP (last
, 1) = REG_NOTES (insn
);
9615 REG_NOTES (insn
) = queued_cfa_restores
;
9616 queued_cfa_restores
= NULL_RTX
;
9617 RTX_FRAME_RELATED_P (insn
) = 1;
9620 /* Expand prologue or epilogue stack adjustment.
9621 The pattern exist to put a dependency on all ebp-based memory accesses.
9622 STYLE should be negative if instructions should be marked as frame related,
9623 zero if %r11 register is live and cannot be freely used and positive
9627 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9628 int style
, bool set_cfa
)
9630 struct machine_function
*m
= cfun
->machine
;
9632 bool add_frame_related_expr
= false;
9634 if (Pmode
== SImode
)
9635 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9636 else if (x86_64_immediate_operand (offset
, DImode
))
9637 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9641 /* r11 is used by indirect sibcall return as well, set before the
9642 epilogue and used after the epilogue. */
9644 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9647 gcc_assert (src
!= hard_frame_pointer_rtx
9648 && dest
!= hard_frame_pointer_rtx
);
9649 tmp
= hard_frame_pointer_rtx
;
9651 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9653 add_frame_related_expr
= true;
9655 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9658 insn
= emit_insn (insn
);
9660 ix86_add_queued_cfa_restore_notes (insn
);
9666 gcc_assert (m
->fs
.cfa_reg
== src
);
9667 m
->fs
.cfa_offset
+= INTVAL (offset
);
9668 m
->fs
.cfa_reg
= dest
;
9670 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9671 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9672 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9673 RTX_FRAME_RELATED_P (insn
) = 1;
9677 RTX_FRAME_RELATED_P (insn
) = 1;
9678 if (add_frame_related_expr
)
9680 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9681 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9682 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9686 if (dest
== stack_pointer_rtx
)
9688 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9689 bool valid
= m
->fs
.sp_valid
;
9691 if (src
== hard_frame_pointer_rtx
)
9693 valid
= m
->fs
.fp_valid
;
9694 ooffset
= m
->fs
.fp_offset
;
9696 else if (src
== crtl
->drap_reg
)
9698 valid
= m
->fs
.drap_valid
;
9703 /* Else there are two possibilities: SP itself, which we set
9704 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9705 taken care of this by hand along the eh_return path. */
9706 gcc_checking_assert (src
== stack_pointer_rtx
9707 || offset
== const0_rtx
);
9710 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9711 m
->fs
.sp_valid
= valid
;
9715 /* Find an available register to be used as dynamic realign argument
9716 pointer regsiter. Such a register will be written in prologue and
9717 used in begin of body, so it must not be
9718 1. parameter passing register.
9720 We reuse static-chain register if it is available. Otherwise, we
9721 use DI for i386 and R13 for x86-64. We chose R13 since it has
9724 Return: the regno of chosen register. */
9727 find_drap_reg (void)
9729 tree decl
= cfun
->decl
;
9733 /* Use R13 for nested function or function need static chain.
9734 Since function with tail call may use any caller-saved
9735 registers in epilogue, DRAP must not use caller-saved
9736 register in such case. */
9737 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9744 /* Use DI for nested function or function need static chain.
9745 Since function with tail call may use any caller-saved
9746 registers in epilogue, DRAP must not use caller-saved
9747 register in such case. */
9748 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9751 /* Reuse static chain register if it isn't used for parameter
9753 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9755 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9756 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9763 /* Return minimum incoming stack alignment. */
9766 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9768 unsigned int incoming_stack_boundary
;
9770 /* Prefer the one specified at command line. */
9771 if (ix86_user_incoming_stack_boundary
)
9772 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9773 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9774 if -mstackrealign is used, it isn't used for sibcall check and
9775 estimated stack alignment is 128bit. */
9778 && ix86_force_align_arg_pointer
9779 && crtl
->stack_alignment_estimated
== 128)
9780 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9782 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9784 /* Incoming stack alignment can be changed on individual functions
9785 via force_align_arg_pointer attribute. We use the smallest
9786 incoming stack boundary. */
9787 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9788 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9789 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9790 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9792 /* The incoming stack frame has to be aligned at least at
9793 parm_stack_boundary. */
9794 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9795 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9797 /* Stack at entrance of main is aligned by runtime. We use the
9798 smallest incoming stack boundary. */
9799 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9800 && DECL_NAME (current_function_decl
)
9801 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9802 && DECL_FILE_SCOPE_P (current_function_decl
))
9803 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9805 return incoming_stack_boundary
;
9808 /* Update incoming stack boundary and estimated stack alignment. */
9811 ix86_update_stack_boundary (void)
9813 ix86_incoming_stack_boundary
9814 = ix86_minimum_incoming_stack_boundary (false);
9816 /* x86_64 vararg needs 16byte stack alignment for register save
9820 && crtl
->stack_alignment_estimated
< 128)
9821 crtl
->stack_alignment_estimated
= 128;
9824 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9825 needed or an rtx for DRAP otherwise. */
9828 ix86_get_drap_rtx (void)
9830 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9831 crtl
->need_drap
= true;
9833 if (stack_realign_drap
)
9835 /* Assign DRAP to vDRAP and returns vDRAP */
9836 unsigned int regno
= find_drap_reg ();
9841 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9842 crtl
->drap_reg
= arg_ptr
;
9845 drap_vreg
= copy_to_reg (arg_ptr
);
9849 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9852 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9853 RTX_FRAME_RELATED_P (insn
) = 1;
9861 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9864 ix86_internal_arg_pointer (void)
9866 return virtual_incoming_args_rtx
;
9869 struct scratch_reg
{
9874 /* Return a short-lived scratch register for use on function entry.
9875 In 32-bit mode, it is valid only after the registers are saved
9876 in the prologue. This register must be released by means of
9877 release_scratch_register_on_entry once it is dead. */
9880 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9888 /* We always use R11 in 64-bit mode. */
9893 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9895 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9897 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9898 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9899 int regparm
= ix86_function_regparm (fntype
, decl
);
9901 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9903 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9904 for the static chain register. */
9905 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9906 && drap_regno
!= AX_REG
)
9908 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9909 for the static chain register. */
9910 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9912 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9914 /* ecx is the static chain register. */
9915 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9917 && drap_regno
!= CX_REG
)
9919 else if (ix86_save_reg (BX_REG
, true))
9921 /* esi is the static chain register. */
9922 else if (!(regparm
== 3 && static_chain_p
)
9923 && ix86_save_reg (SI_REG
, true))
9925 else if (ix86_save_reg (DI_REG
, true))
9929 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9934 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9937 rtx insn
= emit_insn (gen_push (sr
->reg
));
9938 RTX_FRAME_RELATED_P (insn
) = 1;
9942 /* Release a scratch register obtained from the preceding function. */
9945 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9949 struct machine_function
*m
= cfun
->machine
;
9950 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9952 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9953 RTX_FRAME_RELATED_P (insn
) = 1;
9954 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9955 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9956 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9957 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9961 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9963 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9966 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9968 /* We skip the probe for the first interval + a small dope of 4 words and
9969 probe that many bytes past the specified size to maintain a protection
9970 area at the botton of the stack. */
9971 const int dope
= 4 * UNITS_PER_WORD
;
9972 rtx size_rtx
= GEN_INT (size
), last
;
9974 /* See if we have a constant small number of probes to generate. If so,
9975 that's the easy case. The run-time loop is made up of 11 insns in the
9976 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9977 for n # of intervals. */
9978 if (size
<= 5 * PROBE_INTERVAL
)
9980 HOST_WIDE_INT i
, adjust
;
9981 bool first_probe
= true;
9983 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9984 values of N from 1 until it exceeds SIZE. If only one probe is
9985 needed, this will not generate any code. Then adjust and probe
9986 to PROBE_INTERVAL + SIZE. */
9987 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9991 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9992 first_probe
= false;
9995 adjust
= PROBE_INTERVAL
;
9997 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9998 plus_constant (Pmode
, stack_pointer_rtx
,
10000 emit_stack_probe (stack_pointer_rtx
);
10004 adjust
= size
+ PROBE_INTERVAL
+ dope
;
10006 adjust
= size
+ PROBE_INTERVAL
- i
;
10008 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10009 plus_constant (Pmode
, stack_pointer_rtx
,
10011 emit_stack_probe (stack_pointer_rtx
);
10013 /* Adjust back to account for the additional first interval. */
10014 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10015 plus_constant (Pmode
, stack_pointer_rtx
,
10016 PROBE_INTERVAL
+ dope
)));
10019 /* Otherwise, do the same as above, but in a loop. Note that we must be
10020 extra careful with variables wrapping around because we might be at
10021 the very top (or the very bottom) of the address space and we have
10022 to be able to handle this case properly; in particular, we use an
10023 equality test for the loop condition. */
10026 HOST_WIDE_INT rounded_size
;
10027 struct scratch_reg sr
;
10029 get_scratch_register_on_entry (&sr
);
10032 /* Step 1: round SIZE to the previous multiple of the interval. */
10034 rounded_size
= size
& -PROBE_INTERVAL
;
10037 /* Step 2: compute initial and final value of the loop counter. */
10039 /* SP = SP_0 + PROBE_INTERVAL. */
10040 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10041 plus_constant (Pmode
, stack_pointer_rtx
,
10042 - (PROBE_INTERVAL
+ dope
))));
10044 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10045 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10046 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10047 gen_rtx_PLUS (Pmode
, sr
.reg
,
10048 stack_pointer_rtx
)));
10051 /* Step 3: the loop
10053 while (SP != LAST_ADDR)
10055 SP = SP + PROBE_INTERVAL
10059 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10060 values of N from 1 until it is equal to ROUNDED_SIZE. */
10062 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10065 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10066 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10068 if (size
!= rounded_size
)
10070 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10071 plus_constant (Pmode
, stack_pointer_rtx
,
10072 rounded_size
- size
)));
10073 emit_stack_probe (stack_pointer_rtx
);
10076 /* Adjust back to account for the additional first interval. */
10077 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10078 plus_constant (Pmode
, stack_pointer_rtx
,
10079 PROBE_INTERVAL
+ dope
)));
10081 release_scratch_register_on_entry (&sr
);
10084 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10086 /* Even if the stack pointer isn't the CFA register, we need to correctly
10087 describe the adjustments made to it, in particular differentiate the
10088 frame-related ones from the frame-unrelated ones. */
10091 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10092 XVECEXP (expr
, 0, 0)
10093 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10094 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10095 XVECEXP (expr
, 0, 1)
10096 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10097 plus_constant (Pmode
, stack_pointer_rtx
,
10098 PROBE_INTERVAL
+ dope
+ size
));
10099 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10100 RTX_FRAME_RELATED_P (last
) = 1;
10102 cfun
->machine
->fs
.sp_offset
+= size
;
10105 /* Make sure nothing is scheduled before we are done. */
10106 emit_insn (gen_blockage ());
10109 /* Adjust the stack pointer up to REG while probing it. */
10112 output_adjust_stack_and_probe (rtx reg
)
10114 static int labelno
= 0;
10115 char loop_lab
[32], end_lab
[32];
10118 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10119 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10121 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10123 /* Jump to END_LAB if SP == LAST_ADDR. */
10124 xops
[0] = stack_pointer_rtx
;
10126 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10127 fputs ("\tje\t", asm_out_file
);
10128 assemble_name_raw (asm_out_file
, end_lab
);
10129 fputc ('\n', asm_out_file
);
10131 /* SP = SP + PROBE_INTERVAL. */
10132 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10133 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10136 xops
[1] = const0_rtx
;
10137 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10139 fprintf (asm_out_file
, "\tjmp\t");
10140 assemble_name_raw (asm_out_file
, loop_lab
);
10141 fputc ('\n', asm_out_file
);
10143 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10148 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10149 inclusive. These are offsets from the current stack pointer. */
10152 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10154 /* See if we have a constant small number of probes to generate. If so,
10155 that's the easy case. The run-time loop is made up of 7 insns in the
10156 generic case while the compile-time loop is made up of n insns for n #
10158 if (size
<= 7 * PROBE_INTERVAL
)
10162 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10163 it exceeds SIZE. If only one probe is needed, this will not
10164 generate any code. Then probe at FIRST + SIZE. */
10165 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10166 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10169 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10173 /* Otherwise, do the same as above, but in a loop. Note that we must be
10174 extra careful with variables wrapping around because we might be at
10175 the very top (or the very bottom) of the address space and we have
10176 to be able to handle this case properly; in particular, we use an
10177 equality test for the loop condition. */
10180 HOST_WIDE_INT rounded_size
, last
;
10181 struct scratch_reg sr
;
10183 get_scratch_register_on_entry (&sr
);
10186 /* Step 1: round SIZE to the previous multiple of the interval. */
10188 rounded_size
= size
& -PROBE_INTERVAL
;
10191 /* Step 2: compute initial and final value of the loop counter. */
10193 /* TEST_OFFSET = FIRST. */
10194 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10196 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10197 last
= first
+ rounded_size
;
10200 /* Step 3: the loop
10202 while (TEST_ADDR != LAST_ADDR)
10204 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10208 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10209 until it is equal to ROUNDED_SIZE. */
10211 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10214 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10215 that SIZE is equal to ROUNDED_SIZE. */
10217 if (size
!= rounded_size
)
10218 emit_stack_probe (plus_constant (Pmode
,
10219 gen_rtx_PLUS (Pmode
,
10222 rounded_size
- size
));
10224 release_scratch_register_on_entry (&sr
);
10227 /* Make sure nothing is scheduled before we are done. */
10228 emit_insn (gen_blockage ());
10231 /* Probe a range of stack addresses from REG to END, inclusive. These are
10232 offsets from the current stack pointer. */
10235 output_probe_stack_range (rtx reg
, rtx end
)
10237 static int labelno
= 0;
10238 char loop_lab
[32], end_lab
[32];
10241 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10242 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10244 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10246 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10249 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10250 fputs ("\tje\t", asm_out_file
);
10251 assemble_name_raw (asm_out_file
, end_lab
);
10252 fputc ('\n', asm_out_file
);
10254 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10255 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10256 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10258 /* Probe at TEST_ADDR. */
10259 xops
[0] = stack_pointer_rtx
;
10261 xops
[2] = const0_rtx
;
10262 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10264 fprintf (asm_out_file
, "\tjmp\t");
10265 assemble_name_raw (asm_out_file
, loop_lab
);
10266 fputc ('\n', asm_out_file
);
10268 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10273 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10274 to be generated in correct form. */
10276 ix86_finalize_stack_realign_flags (void)
10278 /* Check if stack realign is really needed after reload, and
10279 stores result in cfun */
10280 unsigned int incoming_stack_boundary
10281 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10282 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10283 unsigned int stack_realign
= (incoming_stack_boundary
10285 ? crtl
->max_used_stack_slot_alignment
10286 : crtl
->stack_alignment_needed
));
10288 if (crtl
->stack_realign_finalized
)
10290 /* After stack_realign_needed is finalized, we can't no longer
10292 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10296 /* If the only reason for frame_pointer_needed is that we conservatively
10297 assumed stack realignment might be needed, but in the end nothing that
10298 needed the stack alignment had been spilled, clear frame_pointer_needed
10299 and say we don't need stack realignment. */
10301 && !crtl
->need_drap
10302 && frame_pointer_needed
10304 && flag_omit_frame_pointer
10305 && crtl
->sp_is_unchanging
10306 && !ix86_current_function_calls_tls_descriptor
10307 && !crtl
->accesses_prior_frames
10308 && !cfun
->calls_alloca
10309 && !crtl
->calls_eh_return
10310 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10311 && !ix86_frame_pointer_required ()
10312 && get_frame_size () == 0
10313 && ix86_nsaved_sseregs () == 0
10314 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10316 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10319 CLEAR_HARD_REG_SET (prologue_used
);
10320 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10321 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10322 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10323 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10324 HARD_FRAME_POINTER_REGNUM
);
10328 FOR_BB_INSNS (bb
, insn
)
10329 if (NONDEBUG_INSN_P (insn
)
10330 && requires_stack_frame_p (insn
, prologue_used
,
10331 set_up_by_prologue
))
10333 crtl
->stack_realign_needed
= stack_realign
;
10334 crtl
->stack_realign_finalized
= true;
10339 frame_pointer_needed
= false;
10340 stack_realign
= false;
10341 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10342 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10343 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10344 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10345 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10346 df_finish_pass (true);
10347 df_scan_alloc (NULL
);
10349 df_compute_regs_ever_live (true);
10353 crtl
->stack_realign_needed
= stack_realign
;
10354 crtl
->stack_realign_finalized
= true;
10357 /* Expand the prologue into a bunch of separate insns. */
10360 ix86_expand_prologue (void)
10362 struct machine_function
*m
= cfun
->machine
;
10365 struct ix86_frame frame
;
10366 HOST_WIDE_INT allocate
;
10367 bool int_registers_saved
;
10368 bool sse_registers_saved
;
10370 ix86_finalize_stack_realign_flags ();
10372 /* DRAP should not coexist with stack_realign_fp */
10373 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10375 memset (&m
->fs
, 0, sizeof (m
->fs
));
10377 /* Initialize CFA state for before the prologue. */
10378 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10379 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10381 /* Track SP offset to the CFA. We continue tracking this after we've
10382 swapped the CFA register away from SP. In the case of re-alignment
10383 this is fudged; we're interested to offsets within the local frame. */
10384 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10385 m
->fs
.sp_valid
= true;
10387 ix86_compute_frame_layout (&frame
);
10389 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10391 /* We should have already generated an error for any use of
10392 ms_hook on a nested function. */
10393 gcc_checking_assert (!ix86_static_chain_on_stack
);
10395 /* Check if profiling is active and we shall use profiling before
10396 prologue variant. If so sorry. */
10397 if (crtl
->profile
&& flag_fentry
!= 0)
10398 sorry ("ms_hook_prologue attribute isn%'t compatible "
10399 "with -mfentry for 32-bit");
10401 /* In ix86_asm_output_function_label we emitted:
10402 8b ff movl.s %edi,%edi
10404 8b ec movl.s %esp,%ebp
10406 This matches the hookable function prologue in Win32 API
10407 functions in Microsoft Windows XP Service Pack 2 and newer.
10408 Wine uses this to enable Windows apps to hook the Win32 API
10409 functions provided by Wine.
10411 What that means is that we've already set up the frame pointer. */
10413 if (frame_pointer_needed
10414 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10418 /* We've decided to use the frame pointer already set up.
10419 Describe this to the unwinder by pretending that both
10420 push and mov insns happen right here.
10422 Putting the unwind info here at the end of the ms_hook
10423 is done so that we can make absolutely certain we get
10424 the required byte sequence at the start of the function,
10425 rather than relying on an assembler that can produce
10426 the exact encoding required.
10428 However it does mean (in the unpatched case) that we have
10429 a 1 insn window where the asynchronous unwind info is
10430 incorrect. However, if we placed the unwind info at
10431 its correct location we would have incorrect unwind info
10432 in the patched case. Which is probably all moot since
10433 I don't expect Wine generates dwarf2 unwind info for the
10434 system libraries that use this feature. */
10436 insn
= emit_insn (gen_blockage ());
10438 push
= gen_push (hard_frame_pointer_rtx
);
10439 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10440 stack_pointer_rtx
);
10441 RTX_FRAME_RELATED_P (push
) = 1;
10442 RTX_FRAME_RELATED_P (mov
) = 1;
10444 RTX_FRAME_RELATED_P (insn
) = 1;
10445 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10446 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10448 /* Note that gen_push incremented m->fs.cfa_offset, even
10449 though we didn't emit the push insn here. */
10450 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10451 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10452 m
->fs
.fp_valid
= true;
10456 /* The frame pointer is not needed so pop %ebp again.
10457 This leaves us with a pristine state. */
10458 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10462 /* The first insn of a function that accepts its static chain on the
10463 stack is to push the register that would be filled in by a direct
10464 call. This insn will be skipped by the trampoline. */
10465 else if (ix86_static_chain_on_stack
)
10467 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10468 emit_insn (gen_blockage ());
10470 /* We don't want to interpret this push insn as a register save,
10471 only as a stack adjustment. The real copy of the register as
10472 a save will be done later, if needed. */
10473 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10474 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10475 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10476 RTX_FRAME_RELATED_P (insn
) = 1;
10479 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10480 of DRAP is needed and stack realignment is really needed after reload */
10481 if (stack_realign_drap
)
10483 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10485 /* Only need to push parameter pointer reg if it is caller saved. */
10486 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10488 /* Push arg pointer reg */
10489 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10490 RTX_FRAME_RELATED_P (insn
) = 1;
10493 /* Grab the argument pointer. */
10494 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10495 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10496 RTX_FRAME_RELATED_P (insn
) = 1;
10497 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10498 m
->fs
.cfa_offset
= 0;
10500 /* Align the stack. */
10501 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10503 GEN_INT (-align_bytes
)));
10504 RTX_FRAME_RELATED_P (insn
) = 1;
10506 /* Replicate the return address on the stack so that return
10507 address can be reached via (argp - 1) slot. This is needed
10508 to implement macro RETURN_ADDR_RTX and intrinsic function
10509 expand_builtin_return_addr etc. */
10510 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10511 t
= gen_frame_mem (word_mode
, t
);
10512 insn
= emit_insn (gen_push (t
));
10513 RTX_FRAME_RELATED_P (insn
) = 1;
10515 /* For the purposes of frame and register save area addressing,
10516 we've started over with a new frame. */
10517 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10518 m
->fs
.realigned
= true;
10521 int_registers_saved
= (frame
.nregs
== 0);
10522 sse_registers_saved
= (frame
.nsseregs
== 0);
10524 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10526 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10527 slower on all targets. Also sdb doesn't like it. */
10528 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10529 RTX_FRAME_RELATED_P (insn
) = 1;
10531 /* Push registers now, before setting the frame pointer
10533 if (!int_registers_saved
10535 && !frame
.save_regs_using_mov
)
10537 ix86_emit_save_regs ();
10538 int_registers_saved
= true;
10539 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10542 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10544 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10545 RTX_FRAME_RELATED_P (insn
) = 1;
10547 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10548 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10549 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10550 m
->fs
.fp_valid
= true;
10554 if (!int_registers_saved
)
10556 /* If saving registers via PUSH, do so now. */
10557 if (!frame
.save_regs_using_mov
)
10559 ix86_emit_save_regs ();
10560 int_registers_saved
= true;
10561 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10564 /* When using red zone we may start register saving before allocating
10565 the stack frame saving one cycle of the prologue. However, avoid
10566 doing this if we have to probe the stack; at least on x86_64 the
10567 stack probe can turn into a call that clobbers a red zone location. */
10568 else if (ix86_using_red_zone ()
10569 && (! TARGET_STACK_PROBE
10570 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10572 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10573 int_registers_saved
= true;
10577 if (stack_realign_fp
)
10579 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10580 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10582 /* The computation of the size of the re-aligned stack frame means
10583 that we must allocate the size of the register save area before
10584 performing the actual alignment. Otherwise we cannot guarantee
10585 that there's enough storage above the realignment point. */
10586 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10587 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10588 GEN_INT (m
->fs
.sp_offset
10589 - frame
.sse_reg_save_offset
),
10592 /* Align the stack. */
10593 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10595 GEN_INT (-align_bytes
)));
10597 /* For the purposes of register save area addressing, the stack
10598 pointer is no longer valid. As for the value of sp_offset,
10599 see ix86_compute_frame_layout, which we need to match in order
10600 to pass verification of stack_pointer_offset at the end. */
10601 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10602 m
->fs
.sp_valid
= false;
10605 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10607 if (flag_stack_usage_info
)
10609 /* We start to count from ARG_POINTER. */
10610 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10612 /* If it was realigned, take into account the fake frame. */
10613 if (stack_realign_drap
)
10615 if (ix86_static_chain_on_stack
)
10616 stack_size
+= UNITS_PER_WORD
;
10618 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10619 stack_size
+= UNITS_PER_WORD
;
10621 /* This over-estimates by 1 minimal-stack-alignment-unit but
10622 mitigates that by counting in the new return address slot. */
10623 current_function_dynamic_stack_size
10624 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10627 current_function_static_stack_size
= stack_size
;
10630 /* On SEH target with very large frame size, allocate an area to save
10631 SSE registers (as the very large allocation won't be described). */
10633 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10634 && !sse_registers_saved
)
10636 HOST_WIDE_INT sse_size
=
10637 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10639 gcc_assert (int_registers_saved
);
10641 /* No need to do stack checking as the area will be immediately
10643 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10644 GEN_INT (-sse_size
), -1,
10645 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10646 allocate
-= sse_size
;
10647 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10648 sse_registers_saved
= true;
10651 /* The stack has already been decremented by the instruction calling us
10652 so probe if the size is non-negative to preserve the protection area. */
10653 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10655 /* We expect the registers to be saved when probes are used. */
10656 gcc_assert (int_registers_saved
);
10658 if (STACK_CHECK_MOVING_SP
)
10660 ix86_adjust_stack_and_probe (allocate
);
10665 HOST_WIDE_INT size
= allocate
;
10667 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10668 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10670 if (TARGET_STACK_PROBE
)
10671 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10673 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10679 else if (!ix86_target_stack_probe ()
10680 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10682 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10683 GEN_INT (-allocate
), -1,
10684 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10688 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10690 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10691 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10692 bool eax_live
= false;
10693 bool r10_live
= false;
10696 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10697 if (!TARGET_64BIT_MS_ABI
)
10698 eax_live
= ix86_eax_live_at_start_p ();
10700 /* Note that SEH directives need to continue tracking the stack
10701 pointer even after the frame pointer has been set up. */
10704 insn
= emit_insn (gen_push (eax
));
10705 allocate
-= UNITS_PER_WORD
;
10706 if (sp_is_cfa_reg
|| TARGET_SEH
)
10709 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10710 RTX_FRAME_RELATED_P (insn
) = 1;
10716 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10717 insn
= emit_insn (gen_push (r10
));
10718 allocate
-= UNITS_PER_WORD
;
10719 if (sp_is_cfa_reg
|| TARGET_SEH
)
10722 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10723 RTX_FRAME_RELATED_P (insn
) = 1;
10727 emit_move_insn (eax
, GEN_INT (allocate
));
10728 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10730 /* Use the fact that AX still contains ALLOCATE. */
10731 adjust_stack_insn
= (Pmode
== DImode
10732 ? gen_pro_epilogue_adjust_stack_di_sub
10733 : gen_pro_epilogue_adjust_stack_si_sub
);
10735 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10736 stack_pointer_rtx
, eax
));
10738 if (sp_is_cfa_reg
|| TARGET_SEH
)
10741 m
->fs
.cfa_offset
+= allocate
;
10742 RTX_FRAME_RELATED_P (insn
) = 1;
10743 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10744 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10745 plus_constant (Pmode
, stack_pointer_rtx
,
10748 m
->fs
.sp_offset
+= allocate
;
10750 if (r10_live
&& eax_live
)
10752 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10753 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10754 gen_frame_mem (word_mode
, t
));
10755 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10756 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10757 gen_frame_mem (word_mode
, t
));
10759 else if (eax_live
|| r10_live
)
10761 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10762 emit_move_insn (gen_rtx_REG (word_mode
,
10763 (eax_live
? AX_REG
: R10_REG
)),
10764 gen_frame_mem (word_mode
, t
));
10767 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10769 /* If we havn't already set up the frame pointer, do so now. */
10770 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10772 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10773 GEN_INT (frame
.stack_pointer_offset
10774 - frame
.hard_frame_pointer_offset
));
10775 insn
= emit_insn (insn
);
10776 RTX_FRAME_RELATED_P (insn
) = 1;
10777 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10779 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10780 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10781 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10782 m
->fs
.fp_valid
= true;
10785 if (!int_registers_saved
)
10786 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10787 if (!sse_registers_saved
)
10788 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10790 pic_reg_used
= false;
10791 /* We don't use pic-register for pe-coff target. */
10792 if (pic_offset_table_rtx
10794 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10797 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10799 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10800 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10802 pic_reg_used
= true;
10809 if (ix86_cmodel
== CM_LARGE_PIC
)
10811 rtx label
, tmp_reg
;
10813 gcc_assert (Pmode
== DImode
);
10814 label
= gen_label_rtx ();
10815 emit_label (label
);
10816 LABEL_PRESERVE_P (label
) = 1;
10817 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10818 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10819 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10821 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10822 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10823 pic_offset_table_rtx
, tmp_reg
));
10826 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10830 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10831 RTX_FRAME_RELATED_P (insn
) = 1;
10832 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10836 /* In the pic_reg_used case, make sure that the got load isn't deleted
10837 when mcount needs it. Blockage to avoid call movement across mcount
10838 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10840 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10841 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10843 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10845 /* vDRAP is setup but after reload it turns out stack realign
10846 isn't necessary, here we will emit prologue to setup DRAP
10847 without stack realign adjustment */
10848 t
= choose_baseaddr (0);
10849 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10852 /* Prevent instructions from being scheduled into register save push
10853 sequence when access to the redzone area is done through frame pointer.
10854 The offset between the frame pointer and the stack pointer is calculated
10855 relative to the value of the stack pointer at the end of the function
10856 prologue, and moving instructions that access redzone area via frame
10857 pointer inside push sequence violates this assumption. */
10858 if (frame_pointer_needed
&& frame
.red_zone_size
)
10859 emit_insn (gen_memory_blockage ());
10861 /* Emit cld instruction if stringops are used in the function. */
10862 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10863 emit_insn (gen_cld ());
10865 /* SEH requires that the prologue end within 256 bytes of the start of
10866 the function. Prevent instruction schedules that would extend that.
10867 Further, prevent alloca modifications to the stack pointer from being
10868 combined with prologue modifications. */
10870 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10873 /* Emit code to restore REG using a POP insn. */
10876 ix86_emit_restore_reg_using_pop (rtx reg
)
10878 struct machine_function
*m
= cfun
->machine
;
10879 rtx insn
= emit_insn (gen_pop (reg
));
10881 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10882 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10884 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10885 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10887 /* Previously we'd represented the CFA as an expression
10888 like *(%ebp - 8). We've just popped that value from
10889 the stack, which means we need to reset the CFA to
10890 the drap register. This will remain until we restore
10891 the stack pointer. */
10892 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10893 RTX_FRAME_RELATED_P (insn
) = 1;
10895 /* This means that the DRAP register is valid for addressing too. */
10896 m
->fs
.drap_valid
= true;
10900 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10902 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10903 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10904 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10905 RTX_FRAME_RELATED_P (insn
) = 1;
10907 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10910 /* When the frame pointer is the CFA, and we pop it, we are
10911 swapping back to the stack pointer as the CFA. This happens
10912 for stack frames that don't allocate other data, so we assume
10913 the stack pointer is now pointing at the return address, i.e.
10914 the function entry state, which makes the offset be 1 word. */
10915 if (reg
== hard_frame_pointer_rtx
)
10917 m
->fs
.fp_valid
= false;
10918 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10920 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10921 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10923 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10924 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10925 GEN_INT (m
->fs
.cfa_offset
)));
10926 RTX_FRAME_RELATED_P (insn
) = 1;
10931 /* Emit code to restore saved registers using POP insns. */
10934 ix86_emit_restore_regs_using_pop (void)
10936 unsigned int regno
;
10938 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10939 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10940 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10943 /* Emit code and notes for the LEAVE instruction. */
10946 ix86_emit_leave (void)
10948 struct machine_function
*m
= cfun
->machine
;
10949 rtx insn
= emit_insn (ix86_gen_leave ());
10951 ix86_add_queued_cfa_restore_notes (insn
);
10953 gcc_assert (m
->fs
.fp_valid
);
10954 m
->fs
.sp_valid
= true;
10955 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10956 m
->fs
.fp_valid
= false;
10958 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10960 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10961 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10963 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10964 plus_constant (Pmode
, stack_pointer_rtx
,
10966 RTX_FRAME_RELATED_P (insn
) = 1;
10968 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10972 /* Emit code to restore saved registers using MOV insns.
10973 First register is restored from CFA - CFA_OFFSET. */
10975 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10976 bool maybe_eh_return
)
10978 struct machine_function
*m
= cfun
->machine
;
10979 unsigned int regno
;
10981 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10982 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10984 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10987 mem
= choose_baseaddr (cfa_offset
);
10988 mem
= gen_frame_mem (word_mode
, mem
);
10989 insn
= emit_move_insn (reg
, mem
);
10991 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10993 /* Previously we'd represented the CFA as an expression
10994 like *(%ebp - 8). We've just popped that value from
10995 the stack, which means we need to reset the CFA to
10996 the drap register. This will remain until we restore
10997 the stack pointer. */
10998 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10999 RTX_FRAME_RELATED_P (insn
) = 1;
11001 /* This means that the DRAP register is valid for addressing. */
11002 m
->fs
.drap_valid
= true;
11005 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11007 cfa_offset
-= UNITS_PER_WORD
;
11011 /* Emit code to restore saved registers using MOV insns.
11012 First register is restored from CFA - CFA_OFFSET. */
11014 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11015 bool maybe_eh_return
)
11017 unsigned int regno
;
11019 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11020 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11022 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11025 mem
= choose_baseaddr (cfa_offset
);
11026 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11027 set_mem_align (mem
, 128);
11028 emit_move_insn (reg
, mem
);
11030 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11036 /* Restore function stack, frame, and registers. */
11039 ix86_expand_epilogue (int style
)
11041 struct machine_function
*m
= cfun
->machine
;
11042 struct machine_frame_state frame_state_save
= m
->fs
;
11043 struct ix86_frame frame
;
11044 bool restore_regs_via_mov
;
11047 ix86_finalize_stack_realign_flags ();
11048 ix86_compute_frame_layout (&frame
);
11050 m
->fs
.sp_valid
= (!frame_pointer_needed
11051 || (crtl
->sp_is_unchanging
11052 && !stack_realign_fp
));
11053 gcc_assert (!m
->fs
.sp_valid
11054 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11056 /* The FP must be valid if the frame pointer is present. */
11057 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11058 gcc_assert (!m
->fs
.fp_valid
11059 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11061 /* We must have *some* valid pointer to the stack frame. */
11062 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11064 /* The DRAP is never valid at this point. */
11065 gcc_assert (!m
->fs
.drap_valid
);
11067 /* See the comment about red zone and frame
11068 pointer usage in ix86_expand_prologue. */
11069 if (frame_pointer_needed
&& frame
.red_zone_size
)
11070 emit_insn (gen_memory_blockage ());
11072 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11073 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11075 /* Determine the CFA offset of the end of the red-zone. */
11076 m
->fs
.red_zone_offset
= 0;
11077 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11079 /* The red-zone begins below the return address. */
11080 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11082 /* When the register save area is in the aligned portion of
11083 the stack, determine the maximum runtime displacement that
11084 matches up with the aligned frame. */
11085 if (stack_realign_drap
)
11086 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11090 /* Special care must be taken for the normal return case of a function
11091 using eh_return: the eax and edx registers are marked as saved, but
11092 not restored along this path. Adjust the save location to match. */
11093 if (crtl
->calls_eh_return
&& style
!= 2)
11094 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11096 /* EH_RETURN requires the use of moves to function properly. */
11097 if (crtl
->calls_eh_return
)
11098 restore_regs_via_mov
= true;
11099 /* SEH requires the use of pops to identify the epilogue. */
11100 else if (TARGET_SEH
)
11101 restore_regs_via_mov
= false;
11102 /* If we're only restoring one register and sp is not valid then
11103 using a move instruction to restore the register since it's
11104 less work than reloading sp and popping the register. */
11105 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11106 restore_regs_via_mov
= true;
11107 else if (TARGET_EPILOGUE_USING_MOVE
11108 && cfun
->machine
->use_fast_prologue_epilogue
11109 && (frame
.nregs
> 1
11110 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11111 restore_regs_via_mov
= true;
11112 else if (frame_pointer_needed
11114 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11115 restore_regs_via_mov
= true;
11116 else if (frame_pointer_needed
11117 && TARGET_USE_LEAVE
11118 && cfun
->machine
->use_fast_prologue_epilogue
11119 && frame
.nregs
== 1)
11120 restore_regs_via_mov
= true;
11122 restore_regs_via_mov
= false;
11124 if (restore_regs_via_mov
|| frame
.nsseregs
)
11126 /* Ensure that the entire register save area is addressable via
11127 the stack pointer, if we will restore via sp. */
11129 && m
->fs
.sp_offset
> 0x7fffffff
11130 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11131 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11133 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11134 GEN_INT (m
->fs
.sp_offset
11135 - frame
.sse_reg_save_offset
),
11137 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11141 /* If there are any SSE registers to restore, then we have to do it
11142 via moves, since there's obviously no pop for SSE regs. */
11143 if (frame
.nsseregs
)
11144 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11147 if (restore_regs_via_mov
)
11152 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11154 /* eh_return epilogues need %ecx added to the stack pointer. */
11157 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11159 /* Stack align doesn't work with eh_return. */
11160 gcc_assert (!stack_realign_drap
);
11161 /* Neither does regparm nested functions. */
11162 gcc_assert (!ix86_static_chain_on_stack
);
11164 if (frame_pointer_needed
)
11166 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11167 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11168 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11170 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11171 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11173 /* Note that we use SA as a temporary CFA, as the return
11174 address is at the proper place relative to it. We
11175 pretend this happens at the FP restore insn because
11176 prior to this insn the FP would be stored at the wrong
11177 offset relative to SA, and after this insn we have no
11178 other reasonable register to use for the CFA. We don't
11179 bother resetting the CFA to the SP for the duration of
11180 the return insn. */
11181 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11182 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11183 ix86_add_queued_cfa_restore_notes (insn
);
11184 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11185 RTX_FRAME_RELATED_P (insn
) = 1;
11187 m
->fs
.cfa_reg
= sa
;
11188 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11189 m
->fs
.fp_valid
= false;
11191 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11192 const0_rtx
, style
, false);
11196 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11197 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11198 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11199 ix86_add_queued_cfa_restore_notes (insn
);
11201 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11202 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11204 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11205 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11206 plus_constant (Pmode
, stack_pointer_rtx
,
11208 RTX_FRAME_RELATED_P (insn
) = 1;
11211 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11212 m
->fs
.sp_valid
= true;
11217 /* SEH requires that the function end with (1) a stack adjustment
11218 if necessary, (2) a sequence of pops, and (3) a return or
11219 jump instruction. Prevent insns from the function body from
11220 being scheduled into this sequence. */
11223 /* Prevent a catch region from being adjacent to the standard
11224 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11225 several other flags that would be interesting to test are
11227 if (flag_non_call_exceptions
)
11228 emit_insn (gen_nops (const1_rtx
));
11230 emit_insn (gen_blockage ());
11233 /* First step is to deallocate the stack frame so that we can
11234 pop the registers. Also do it on SEH target for very large
11235 frame as the emitted instructions aren't allowed by the ABI in
11237 if (!m
->fs
.sp_valid
11239 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11240 >= SEH_MAX_FRAME_SIZE
)))
11242 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11243 GEN_INT (m
->fs
.fp_offset
11244 - frame
.reg_save_offset
),
11247 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11249 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11250 GEN_INT (m
->fs
.sp_offset
11251 - frame
.reg_save_offset
),
11253 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11256 ix86_emit_restore_regs_using_pop ();
11259 /* If we used a stack pointer and haven't already got rid of it,
11261 if (m
->fs
.fp_valid
)
11263 /* If the stack pointer is valid and pointing at the frame
11264 pointer store address, then we only need a pop. */
11265 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11266 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11267 /* Leave results in shorter dependency chains on CPUs that are
11268 able to grok it fast. */
11269 else if (TARGET_USE_LEAVE
11270 || optimize_bb_for_size_p (EXIT_BLOCK_PTR
)
11271 || !cfun
->machine
->use_fast_prologue_epilogue
)
11272 ix86_emit_leave ();
11275 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11276 hard_frame_pointer_rtx
,
11277 const0_rtx
, style
, !using_drap
);
11278 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11284 int param_ptr_offset
= UNITS_PER_WORD
;
11287 gcc_assert (stack_realign_drap
);
11289 if (ix86_static_chain_on_stack
)
11290 param_ptr_offset
+= UNITS_PER_WORD
;
11291 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11292 param_ptr_offset
+= UNITS_PER_WORD
;
11294 insn
= emit_insn (gen_rtx_SET
11295 (VOIDmode
, stack_pointer_rtx
,
11296 gen_rtx_PLUS (Pmode
,
11298 GEN_INT (-param_ptr_offset
))));
11299 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11300 m
->fs
.cfa_offset
= param_ptr_offset
;
11301 m
->fs
.sp_offset
= param_ptr_offset
;
11302 m
->fs
.realigned
= false;
11304 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11305 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11306 GEN_INT (param_ptr_offset
)));
11307 RTX_FRAME_RELATED_P (insn
) = 1;
11309 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11310 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11313 /* At this point the stack pointer must be valid, and we must have
11314 restored all of the registers. We may not have deallocated the
11315 entire stack frame. We've delayed this until now because it may
11316 be possible to merge the local stack deallocation with the
11317 deallocation forced by ix86_static_chain_on_stack. */
11318 gcc_assert (m
->fs
.sp_valid
);
11319 gcc_assert (!m
->fs
.fp_valid
);
11320 gcc_assert (!m
->fs
.realigned
);
11321 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11323 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11324 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11328 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11330 /* Sibcall epilogues don't want a return instruction. */
11333 m
->fs
= frame_state_save
;
11337 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11339 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11341 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11342 address, do explicit add, and jump indirectly to the caller. */
11344 if (crtl
->args
.pops_args
>= 65536)
11346 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11349 /* There is no "pascal" calling convention in any 64bit ABI. */
11350 gcc_assert (!TARGET_64BIT
);
11352 insn
= emit_insn (gen_pop (ecx
));
11353 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11354 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11356 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11357 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11358 add_reg_note (insn
, REG_CFA_REGISTER
,
11359 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11360 RTX_FRAME_RELATED_P (insn
) = 1;
11362 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11364 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11367 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11370 emit_jump_insn (gen_simple_return_internal ());
11372 /* Restore the state back to the state from the prologue,
11373 so that it's correct for the next epilogue. */
11374 m
->fs
= frame_state_save
;
11377 /* Reset from the function's potential modifications. */
11380 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11381 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11383 if (pic_offset_table_rtx
)
11384 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11386 /* Mach-O doesn't support labels at the end of objects, so if
11387 it looks like we might want one, insert a NOP. */
11389 rtx insn
= get_last_insn ();
11390 rtx deleted_debug_label
= NULL_RTX
;
11393 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11395 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11396 notes only, instead set their CODE_LABEL_NUMBER to -1,
11397 otherwise there would be code generation differences
11398 in between -g and -g0. */
11399 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11400 deleted_debug_label
= insn
;
11401 insn
= PREV_INSN (insn
);
11406 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11407 fputs ("\tnop\n", file
);
11408 else if (deleted_debug_label
)
11409 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11410 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11411 CODE_LABEL_NUMBER (insn
) = -1;
11417 /* Return a scratch register to use in the split stack prologue. The
11418 split stack prologue is used for -fsplit-stack. It is the first
11419 instructions in the function, even before the regular prologue.
11420 The scratch register can be any caller-saved register which is not
11421 used for parameters or for the static chain. */
11423 static unsigned int
11424 split_stack_prologue_scratch_regno (void)
11430 bool is_fastcall
, is_thiscall
;
11433 is_fastcall
= (lookup_attribute ("fastcall",
11434 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11436 is_thiscall
= (lookup_attribute ("thiscall",
11437 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11439 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11443 if (DECL_STATIC_CHAIN (cfun
->decl
))
11445 sorry ("-fsplit-stack does not support fastcall with "
11446 "nested function");
11447 return INVALID_REGNUM
;
11451 else if (is_thiscall
)
11453 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11457 else if (regparm
< 3)
11459 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11465 sorry ("-fsplit-stack does not support 2 register "
11466 " parameters for a nested function");
11467 return INVALID_REGNUM
;
11474 /* FIXME: We could make this work by pushing a register
11475 around the addition and comparison. */
11476 sorry ("-fsplit-stack does not support 3 register parameters");
11477 return INVALID_REGNUM
;
11482 /* A SYMBOL_REF for the function which allocates new stackspace for
11485 static GTY(()) rtx split_stack_fn
;
11487 /* A SYMBOL_REF for the more stack function when using the large
11490 static GTY(()) rtx split_stack_fn_large
;
11492 /* Handle -fsplit-stack. These are the first instructions in the
11493 function, even before the regular prologue. */
11496 ix86_expand_split_stack_prologue (void)
11498 struct ix86_frame frame
;
11499 HOST_WIDE_INT allocate
;
11500 unsigned HOST_WIDE_INT args_size
;
11501 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11502 rtx scratch_reg
= NULL_RTX
;
11503 rtx varargs_label
= NULL_RTX
;
11506 gcc_assert (flag_split_stack
&& reload_completed
);
11508 ix86_finalize_stack_realign_flags ();
11509 ix86_compute_frame_layout (&frame
);
11510 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11512 /* This is the label we will branch to if we have enough stack
11513 space. We expect the basic block reordering pass to reverse this
11514 branch if optimizing, so that we branch in the unlikely case. */
11515 label
= gen_label_rtx ();
11517 /* We need to compare the stack pointer minus the frame size with
11518 the stack boundary in the TCB. The stack boundary always gives
11519 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11520 can compare directly. Otherwise we need to do an addition. */
11522 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11523 UNSPEC_STACK_CHECK
);
11524 limit
= gen_rtx_CONST (Pmode
, limit
);
11525 limit
= gen_rtx_MEM (Pmode
, limit
);
11526 if (allocate
< SPLIT_STACK_AVAILABLE
)
11527 current
= stack_pointer_rtx
;
11530 unsigned int scratch_regno
;
11533 /* We need a scratch register to hold the stack pointer minus
11534 the required frame size. Since this is the very start of the
11535 function, the scratch register can be any caller-saved
11536 register which is not used for parameters. */
11537 offset
= GEN_INT (- allocate
);
11538 scratch_regno
= split_stack_prologue_scratch_regno ();
11539 if (scratch_regno
== INVALID_REGNUM
)
11541 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11542 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11544 /* We don't use ix86_gen_add3 in this case because it will
11545 want to split to lea, but when not optimizing the insn
11546 will not be split after this point. */
11547 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11548 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11553 emit_move_insn (scratch_reg
, offset
);
11554 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11555 stack_pointer_rtx
));
11557 current
= scratch_reg
;
11560 ix86_expand_branch (GEU
, current
, limit
, label
);
11561 jump_insn
= get_last_insn ();
11562 JUMP_LABEL (jump_insn
) = label
;
11564 /* Mark the jump as very likely to be taken. */
11565 add_int_reg_note (jump_insn
, REG_BR_PROB
,
11566 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
11568 if (split_stack_fn
== NULL_RTX
)
11569 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11570 fn
= split_stack_fn
;
11572 /* Get more stack space. We pass in the desired stack space and the
11573 size of the arguments to copy to the new stack. In 32-bit mode
11574 we push the parameters; __morestack will return on a new stack
11575 anyhow. In 64-bit mode we pass the parameters in r10 and
11577 allocate_rtx
= GEN_INT (allocate
);
11578 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11579 call_fusage
= NULL_RTX
;
11584 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11585 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11587 /* If this function uses a static chain, it will be in %r10.
11588 Preserve it across the call to __morestack. */
11589 if (DECL_STATIC_CHAIN (cfun
->decl
))
11593 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11594 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11595 use_reg (&call_fusage
, rax
);
11598 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11601 HOST_WIDE_INT argval
;
11603 gcc_assert (Pmode
== DImode
);
11604 /* When using the large model we need to load the address
11605 into a register, and we've run out of registers. So we
11606 switch to a different calling convention, and we call a
11607 different function: __morestack_large. We pass the
11608 argument size in the upper 32 bits of r10 and pass the
11609 frame size in the lower 32 bits. */
11610 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11611 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11613 if (split_stack_fn_large
== NULL_RTX
)
11614 split_stack_fn_large
=
11615 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11617 if (ix86_cmodel
== CM_LARGE_PIC
)
11621 label
= gen_label_rtx ();
11622 emit_label (label
);
11623 LABEL_PRESERVE_P (label
) = 1;
11624 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11625 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11626 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11627 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11629 x
= gen_rtx_CONST (Pmode
, x
);
11630 emit_move_insn (reg11
, x
);
11631 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11632 x
= gen_const_mem (Pmode
, x
);
11633 emit_move_insn (reg11
, x
);
11636 emit_move_insn (reg11
, split_stack_fn_large
);
11640 argval
= ((args_size
<< 16) << 16) + allocate
;
11641 emit_move_insn (reg10
, GEN_INT (argval
));
11645 emit_move_insn (reg10
, allocate_rtx
);
11646 emit_move_insn (reg11
, GEN_INT (args_size
));
11647 use_reg (&call_fusage
, reg11
);
11650 use_reg (&call_fusage
, reg10
);
11654 emit_insn (gen_push (GEN_INT (args_size
)));
11655 emit_insn (gen_push (allocate_rtx
));
11657 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11658 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11660 add_function_usage_to (call_insn
, call_fusage
);
11662 /* In order to make call/return prediction work right, we now need
11663 to execute a return instruction. See
11664 libgcc/config/i386/morestack.S for the details on how this works.
11666 For flow purposes gcc must not see this as a return
11667 instruction--we need control flow to continue at the subsequent
11668 label. Therefore, we use an unspec. */
11669 gcc_assert (crtl
->args
.pops_args
< 65536);
11670 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11672 /* If we are in 64-bit mode and this function uses a static chain,
11673 we saved %r10 in %rax before calling _morestack. */
11674 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11675 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11676 gen_rtx_REG (word_mode
, AX_REG
));
11678 /* If this function calls va_start, we need to store a pointer to
11679 the arguments on the old stack, because they may not have been
11680 all copied to the new stack. At this point the old stack can be
11681 found at the frame pointer value used by __morestack, because
11682 __morestack has set that up before calling back to us. Here we
11683 store that pointer in a scratch register, and in
11684 ix86_expand_prologue we store the scratch register in a stack
11686 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11688 unsigned int scratch_regno
;
11692 scratch_regno
= split_stack_prologue_scratch_regno ();
11693 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11694 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11698 return address within this function
11699 return address of caller of this function
11701 So we add three words to get to the stack arguments.
11705 return address within this function
11706 first argument to __morestack
11707 second argument to __morestack
11708 return address of caller of this function
11710 So we add five words to get to the stack arguments.
11712 words
= TARGET_64BIT
? 3 : 5;
11713 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11714 gen_rtx_PLUS (Pmode
, frame_reg
,
11715 GEN_INT (words
* UNITS_PER_WORD
))));
11717 varargs_label
= gen_label_rtx ();
11718 emit_jump_insn (gen_jump (varargs_label
));
11719 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11724 emit_label (label
);
11725 LABEL_NUSES (label
) = 1;
11727 /* If this function calls va_start, we now have to set the scratch
11728 register for the case where we do not call __morestack. In this
11729 case we need to set it based on the stack pointer. */
11730 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11732 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11733 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11734 GEN_INT (UNITS_PER_WORD
))));
11736 emit_label (varargs_label
);
11737 LABEL_NUSES (varargs_label
) = 1;
11741 /* We may have to tell the dataflow pass that the split stack prologue
11742 is initializing a scratch register. */
11745 ix86_live_on_entry (bitmap regs
)
11747 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11749 gcc_assert (flag_split_stack
);
11750 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11754 /* Determine if op is suitable SUBREG RTX for address. */
11757 ix86_address_subreg_operand (rtx op
)
11759 enum machine_mode mode
;
11764 mode
= GET_MODE (op
);
11766 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11769 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11770 failures when the register is one word out of a two word structure. */
11771 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11774 /* Allow only SUBREGs of non-eliminable hard registers. */
11775 return register_no_elim_operand (op
, mode
);
11778 /* Extract the parts of an RTL expression that is a valid memory address
11779 for an instruction. Return 0 if the structure of the address is
11780 grossly off. Return -1 if the address contains ASHIFT, so it is not
11781 strictly valid, but still used for computing length of lea instruction. */
11784 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11786 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11787 rtx base_reg
, index_reg
;
11788 HOST_WIDE_INT scale
= 1;
11789 rtx scale_rtx
= NULL_RTX
;
11792 enum ix86_address_seg seg
= SEG_DEFAULT
;
11794 /* Allow zero-extended SImode addresses,
11795 they will be emitted with addr32 prefix. */
11796 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11798 if (GET_CODE (addr
) == ZERO_EXTEND
11799 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11801 addr
= XEXP (addr
, 0);
11802 if (CONST_INT_P (addr
))
11805 else if (GET_CODE (addr
) == AND
11806 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11808 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11809 if (addr
== NULL_RTX
)
11812 if (CONST_INT_P (addr
))
11817 /* Allow SImode subregs of DImode addresses,
11818 they will be emitted with addr32 prefix. */
11819 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11821 if (GET_CODE (addr
) == SUBREG
11822 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11824 addr
= SUBREG_REG (addr
);
11825 if (CONST_INT_P (addr
))
11832 else if (GET_CODE (addr
) == SUBREG
)
11834 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11839 else if (GET_CODE (addr
) == PLUS
)
11841 rtx addends
[4], op
;
11849 addends
[n
++] = XEXP (op
, 1);
11852 while (GET_CODE (op
) == PLUS
);
11857 for (i
= n
; i
>= 0; --i
)
11860 switch (GET_CODE (op
))
11865 index
= XEXP (op
, 0);
11866 scale_rtx
= XEXP (op
, 1);
11872 index
= XEXP (op
, 0);
11873 tmp
= XEXP (op
, 1);
11874 if (!CONST_INT_P (tmp
))
11876 scale
= INTVAL (tmp
);
11877 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11879 scale
= 1 << scale
;
11884 if (GET_CODE (op
) != UNSPEC
)
11889 if (XINT (op
, 1) == UNSPEC_TP
11890 && TARGET_TLS_DIRECT_SEG_REFS
11891 && seg
== SEG_DEFAULT
)
11892 seg
= DEFAULT_TLS_SEG_REG
;
11898 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11925 else if (GET_CODE (addr
) == MULT
)
11927 index
= XEXP (addr
, 0); /* index*scale */
11928 scale_rtx
= XEXP (addr
, 1);
11930 else if (GET_CODE (addr
) == ASHIFT
)
11932 /* We're called for lea too, which implements ashift on occasion. */
11933 index
= XEXP (addr
, 0);
11934 tmp
= XEXP (addr
, 1);
11935 if (!CONST_INT_P (tmp
))
11937 scale
= INTVAL (tmp
);
11938 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11940 scale
= 1 << scale
;
11943 else if (CONST_INT_P (addr
))
11945 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11948 /* Constant addresses are sign extended to 64bit, we have to
11949 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11951 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11957 disp
= addr
; /* displacement */
11963 else if (GET_CODE (index
) == SUBREG
11964 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11970 /* Address override works only on the (%reg) part of %fs:(%reg). */
11971 if (seg
!= SEG_DEFAULT
11972 && ((base
&& GET_MODE (base
) != word_mode
)
11973 || (index
&& GET_MODE (index
) != word_mode
)))
11976 /* Extract the integral value of scale. */
11979 if (!CONST_INT_P (scale_rtx
))
11981 scale
= INTVAL (scale_rtx
);
11984 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11985 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11987 /* Avoid useless 0 displacement. */
11988 if (disp
== const0_rtx
&& (base
|| index
))
11991 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11992 if (base_reg
&& index_reg
&& scale
== 1
11993 && (index_reg
== arg_pointer_rtx
11994 || index_reg
== frame_pointer_rtx
11995 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11998 tmp
= base
, base
= index
, index
= tmp
;
11999 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
12002 /* Special case: %ebp cannot be encoded as a base without a displacement.
12006 && (base_reg
== hard_frame_pointer_rtx
12007 || base_reg
== frame_pointer_rtx
12008 || base_reg
== arg_pointer_rtx
12009 || (REG_P (base_reg
)
12010 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
12011 || REGNO (base_reg
) == R13_REG
))))
12014 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12015 Avoid this by transforming to [%esi+0].
12016 Reload calls address legitimization without cfun defined, so we need
12017 to test cfun for being non-NULL. */
12018 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
12019 && base_reg
&& !index_reg
&& !disp
12020 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
12023 /* Special case: encode reg+reg instead of reg*2. */
12024 if (!base
&& index
&& scale
== 2)
12025 base
= index
, base_reg
= index_reg
, scale
= 1;
12027 /* Special case: scaling cannot be encoded without base or displacement. */
12028 if (!base
&& !disp
&& index
&& scale
!= 1)
12032 out
->index
= index
;
12034 out
->scale
= scale
;
12040 /* Return cost of the memory address x.
12041 For i386, it is better to use a complex address than let gcc copy
12042 the address into a reg and make a new pseudo. But not if the address
12043 requires to two regs - that would mean more pseudos with longer
12046 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12047 addr_space_t as ATTRIBUTE_UNUSED
,
12048 bool speed ATTRIBUTE_UNUSED
)
12050 struct ix86_address parts
;
12052 int ok
= ix86_decompose_address (x
, &parts
);
12056 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12057 parts
.base
= SUBREG_REG (parts
.base
);
12058 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12059 parts
.index
= SUBREG_REG (parts
.index
);
12061 /* Attempt to minimize number of registers in the address. */
12063 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12065 && (!REG_P (parts
.index
)
12066 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12070 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12072 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12073 && parts
.base
!= parts
.index
)
12076 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12077 since it's predecode logic can't detect the length of instructions
12078 and it degenerates to vector decoded. Increase cost of such
12079 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12080 to split such addresses or even refuse such addresses at all.
12082 Following addressing modes are affected:
12087 The first and last case may be avoidable by explicitly coding the zero in
12088 memory address, but I don't have AMD-K6 machine handy to check this
12092 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12093 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12094 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12100 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12101 this is used for to form addresses to local data when -fPIC is in
12105 darwin_local_data_pic (rtx disp
)
12107 return (GET_CODE (disp
) == UNSPEC
12108 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12111 /* Determine if a given RTX is a valid constant. We already know this
12112 satisfies CONSTANT_P. */
12115 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12117 switch (GET_CODE (x
))
12122 if (GET_CODE (x
) == PLUS
)
12124 if (!CONST_INT_P (XEXP (x
, 1)))
12129 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12132 /* Only some unspecs are valid as "constants". */
12133 if (GET_CODE (x
) == UNSPEC
)
12134 switch (XINT (x
, 1))
12137 case UNSPEC_GOTOFF
:
12138 case UNSPEC_PLTOFF
:
12139 return TARGET_64BIT
;
12141 case UNSPEC_NTPOFF
:
12142 x
= XVECEXP (x
, 0, 0);
12143 return (GET_CODE (x
) == SYMBOL_REF
12144 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12145 case UNSPEC_DTPOFF
:
12146 x
= XVECEXP (x
, 0, 0);
12147 return (GET_CODE (x
) == SYMBOL_REF
12148 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12153 /* We must have drilled down to a symbol. */
12154 if (GET_CODE (x
) == LABEL_REF
)
12156 if (GET_CODE (x
) != SYMBOL_REF
)
12161 /* TLS symbols are never valid. */
12162 if (SYMBOL_REF_TLS_MODEL (x
))
12165 /* DLLIMPORT symbols are never valid. */
12166 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12167 && SYMBOL_REF_DLLIMPORT_P (x
))
12171 /* mdynamic-no-pic */
12172 if (MACHO_DYNAMIC_NO_PIC_P
)
12173 return machopic_symbol_defined_p (x
);
12178 if (GET_MODE (x
) == TImode
12179 && x
!= CONST0_RTX (TImode
)
12185 if (!standard_sse_constant_p (x
))
12192 /* Otherwise we handle everything else in the move patterns. */
12196 /* Determine if it's legal to put X into the constant pool. This
12197 is not possible for the address of thread-local symbols, which
12198 is checked above. */
12201 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12203 /* We can always put integral constants and vectors in memory. */
12204 switch (GET_CODE (x
))
12214 return !ix86_legitimate_constant_p (mode
, x
);
12217 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12221 is_imported_p (rtx x
)
12223 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12224 || GET_CODE (x
) != SYMBOL_REF
)
12227 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12231 /* Nonzero if the constant value X is a legitimate general operand
12232 when generating PIC code. It is given that flag_pic is on and
12233 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12236 legitimate_pic_operand_p (rtx x
)
12240 switch (GET_CODE (x
))
12243 inner
= XEXP (x
, 0);
12244 if (GET_CODE (inner
) == PLUS
12245 && CONST_INT_P (XEXP (inner
, 1)))
12246 inner
= XEXP (inner
, 0);
12248 /* Only some unspecs are valid as "constants". */
12249 if (GET_CODE (inner
) == UNSPEC
)
12250 switch (XINT (inner
, 1))
12253 case UNSPEC_GOTOFF
:
12254 case UNSPEC_PLTOFF
:
12255 return TARGET_64BIT
;
12257 x
= XVECEXP (inner
, 0, 0);
12258 return (GET_CODE (x
) == SYMBOL_REF
12259 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12260 case UNSPEC_MACHOPIC_OFFSET
:
12261 return legitimate_pic_address_disp_p (x
);
12269 return legitimate_pic_address_disp_p (x
);
12276 /* Determine if a given CONST RTX is a valid memory displacement
12280 legitimate_pic_address_disp_p (rtx disp
)
12284 /* In 64bit mode we can allow direct addresses of symbols and labels
12285 when they are not dynamic symbols. */
12288 rtx op0
= disp
, op1
;
12290 switch (GET_CODE (disp
))
12296 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12298 op0
= XEXP (XEXP (disp
, 0), 0);
12299 op1
= XEXP (XEXP (disp
, 0), 1);
12300 if (!CONST_INT_P (op1
)
12301 || INTVAL (op1
) >= 16*1024*1024
12302 || INTVAL (op1
) < -16*1024*1024)
12304 if (GET_CODE (op0
) == LABEL_REF
)
12306 if (GET_CODE (op0
) == CONST
12307 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12308 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12310 if (GET_CODE (op0
) == UNSPEC
12311 && XINT (op0
, 1) == UNSPEC_PCREL
)
12313 if (GET_CODE (op0
) != SYMBOL_REF
)
12318 /* TLS references should always be enclosed in UNSPEC.
12319 The dllimported symbol needs always to be resolved. */
12320 if (SYMBOL_REF_TLS_MODEL (op0
)
12321 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12326 if (is_imported_p (op0
))
12329 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12330 || !SYMBOL_REF_LOCAL_P (op0
))
12333 /* Function-symbols need to be resolved only for
12335 For the small-model we don't need to resolve anything
12337 if ((ix86_cmodel
!= CM_LARGE_PIC
12338 && SYMBOL_REF_FUNCTION_P (op0
))
12339 || ix86_cmodel
== CM_SMALL_PIC
)
12341 /* Non-external symbols don't need to be resolved for
12342 large, and medium-model. */
12343 if ((ix86_cmodel
== CM_LARGE_PIC
12344 || ix86_cmodel
== CM_MEDIUM_PIC
)
12345 && !SYMBOL_REF_EXTERNAL_P (op0
))
12348 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12349 && SYMBOL_REF_LOCAL_P (op0
)
12350 && ix86_cmodel
!= CM_LARGE_PIC
)
12358 if (GET_CODE (disp
) != CONST
)
12360 disp
= XEXP (disp
, 0);
12364 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12365 of GOT tables. We should not need these anyway. */
12366 if (GET_CODE (disp
) != UNSPEC
12367 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12368 && XINT (disp
, 1) != UNSPEC_GOTOFF
12369 && XINT (disp
, 1) != UNSPEC_PCREL
12370 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12373 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12374 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12380 if (GET_CODE (disp
) == PLUS
)
12382 if (!CONST_INT_P (XEXP (disp
, 1)))
12384 disp
= XEXP (disp
, 0);
12388 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12391 if (GET_CODE (disp
) != UNSPEC
)
12394 switch (XINT (disp
, 1))
12399 /* We need to check for both symbols and labels because VxWorks loads
12400 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12402 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12403 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12404 case UNSPEC_GOTOFF
:
12405 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12406 While ABI specify also 32bit relocation but we don't produce it in
12407 small PIC model at all. */
12408 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12409 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12411 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12413 case UNSPEC_GOTTPOFF
:
12414 case UNSPEC_GOTNTPOFF
:
12415 case UNSPEC_INDNTPOFF
:
12418 disp
= XVECEXP (disp
, 0, 0);
12419 return (GET_CODE (disp
) == SYMBOL_REF
12420 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12421 case UNSPEC_NTPOFF
:
12422 disp
= XVECEXP (disp
, 0, 0);
12423 return (GET_CODE (disp
) == SYMBOL_REF
12424 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12425 case UNSPEC_DTPOFF
:
12426 disp
= XVECEXP (disp
, 0, 0);
12427 return (GET_CODE (disp
) == SYMBOL_REF
12428 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12434 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12435 replace the input X, or the original X if no replacement is called for.
12436 The output parameter *WIN is 1 if the calling macro should goto WIN,
12437 0 if it should not. */
12440 ix86_legitimize_reload_address (rtx x
,
12441 enum machine_mode mode ATTRIBUTE_UNUSED
,
12442 int opnum
, int type
,
12443 int ind_levels ATTRIBUTE_UNUSED
)
12445 /* Reload can generate:
12447 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12451 This RTX is rejected from ix86_legitimate_address_p due to
12452 non-strictness of base register 97. Following this rejection,
12453 reload pushes all three components into separate registers,
12454 creating invalid memory address RTX.
12456 Following code reloads only the invalid part of the
12457 memory address RTX. */
12459 if (GET_CODE (x
) == PLUS
12460 && REG_P (XEXP (x
, 1))
12461 && GET_CODE (XEXP (x
, 0)) == PLUS
12462 && REG_P (XEXP (XEXP (x
, 0), 1)))
12465 bool something_reloaded
= false;
12467 base
= XEXP (XEXP (x
, 0), 1);
12468 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12470 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12471 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12472 opnum
, (enum reload_type
) type
);
12473 something_reloaded
= true;
12476 index
= XEXP (x
, 1);
12477 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12479 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12480 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12481 opnum
, (enum reload_type
) type
);
12482 something_reloaded
= true;
12485 gcc_assert (something_reloaded
);
12492 /* Recognizes RTL expressions that are valid memory addresses for an
12493 instruction. The MODE argument is the machine mode for the MEM
12494 expression that wants to use this address.
12496 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12497 convert common non-canonical forms to canonical form so that they will
12501 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12502 rtx addr
, bool strict
)
12504 struct ix86_address parts
;
12505 rtx base
, index
, disp
;
12506 HOST_WIDE_INT scale
;
12508 if (ix86_decompose_address (addr
, &parts
) <= 0)
12509 /* Decomposition failed. */
12513 index
= parts
.index
;
12515 scale
= parts
.scale
;
12517 /* Validate base register. */
12524 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12525 reg
= SUBREG_REG (base
);
12527 /* Base is not a register. */
12530 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12533 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12534 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12535 /* Base is not valid. */
12539 /* Validate index register. */
12546 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12547 reg
= SUBREG_REG (index
);
12549 /* Index is not a register. */
12552 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12555 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12556 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12557 /* Index is not valid. */
12561 /* Index and base should have the same mode. */
12563 && GET_MODE (base
) != GET_MODE (index
))
12566 /* Validate scale factor. */
12570 /* Scale without index. */
12573 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12574 /* Scale is not a valid multiplier. */
12578 /* Validate displacement. */
12581 if (GET_CODE (disp
) == CONST
12582 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12583 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12584 switch (XINT (XEXP (disp
, 0), 1))
12586 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12587 used. While ABI specify also 32bit relocations, we don't produce
12588 them at all and use IP relative instead. */
12590 case UNSPEC_GOTOFF
:
12591 gcc_assert (flag_pic
);
12593 goto is_legitimate_pic
;
12595 /* 64bit address unspec. */
12598 case UNSPEC_GOTPCREL
:
12600 gcc_assert (flag_pic
);
12601 goto is_legitimate_pic
;
12603 case UNSPEC_GOTTPOFF
:
12604 case UNSPEC_GOTNTPOFF
:
12605 case UNSPEC_INDNTPOFF
:
12606 case UNSPEC_NTPOFF
:
12607 case UNSPEC_DTPOFF
:
12610 case UNSPEC_STACK_CHECK
:
12611 gcc_assert (flag_split_stack
);
12615 /* Invalid address unspec. */
12619 else if (SYMBOLIC_CONST (disp
)
12623 && MACHOPIC_INDIRECT
12624 && !machopic_operand_p (disp
)
12630 if (TARGET_64BIT
&& (index
|| base
))
12632 /* foo@dtpoff(%rX) is ok. */
12633 if (GET_CODE (disp
) != CONST
12634 || GET_CODE (XEXP (disp
, 0)) != PLUS
12635 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12636 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12637 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12638 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12639 /* Non-constant pic memory reference. */
12642 else if ((!TARGET_MACHO
|| flag_pic
)
12643 && ! legitimate_pic_address_disp_p (disp
))
12644 /* Displacement is an invalid pic construct. */
12647 else if (MACHO_DYNAMIC_NO_PIC_P
12648 && !ix86_legitimate_constant_p (Pmode
, disp
))
12649 /* displacment must be referenced via non_lazy_pointer */
12653 /* This code used to verify that a symbolic pic displacement
12654 includes the pic_offset_table_rtx register.
12656 While this is good idea, unfortunately these constructs may
12657 be created by "adds using lea" optimization for incorrect
12666 This code is nonsensical, but results in addressing
12667 GOT table with pic_offset_table_rtx base. We can't
12668 just refuse it easily, since it gets matched by
12669 "addsi3" pattern, that later gets split to lea in the
12670 case output register differs from input. While this
12671 can be handled by separate addsi pattern for this case
12672 that never results in lea, this seems to be easier and
12673 correct fix for crash to disable this test. */
12675 else if (GET_CODE (disp
) != LABEL_REF
12676 && !CONST_INT_P (disp
)
12677 && (GET_CODE (disp
) != CONST
12678 || !ix86_legitimate_constant_p (Pmode
, disp
))
12679 && (GET_CODE (disp
) != SYMBOL_REF
12680 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12681 /* Displacement is not constant. */
12683 else if (TARGET_64BIT
12684 && !x86_64_immediate_operand (disp
, VOIDmode
))
12685 /* Displacement is out of range. */
12689 /* Everything looks valid. */
12693 /* Determine if a given RTX is a valid constant address. */
12696 constant_address_p (rtx x
)
12698 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12701 /* Return a unique alias set for the GOT. */
12703 static alias_set_type
12704 ix86_GOT_alias_set (void)
12706 static alias_set_type set
= -1;
12708 set
= new_alias_set ();
12712 /* Return a legitimate reference for ORIG (an address) using the
12713 register REG. If REG is 0, a new pseudo is generated.
12715 There are two types of references that must be handled:
12717 1. Global data references must load the address from the GOT, via
12718 the PIC reg. An insn is emitted to do this load, and the reg is
12721 2. Static data references, constant pool addresses, and code labels
12722 compute the address as an offset from the GOT, whose base is in
12723 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12724 differentiate them from global data objects. The returned
12725 address is the PIC reg + an unspec constant.
12727 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12728 reg also appears in the address. */
12731 legitimize_pic_address (rtx orig
, rtx reg
)
12734 rtx new_rtx
= orig
;
12737 if (TARGET_MACHO
&& !TARGET_64BIT
)
12740 reg
= gen_reg_rtx (Pmode
);
12741 /* Use the generic Mach-O PIC machinery. */
12742 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12746 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12748 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12753 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12755 else if (TARGET_64BIT
&& !TARGET_PECOFF
12756 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12759 /* This symbol may be referenced via a displacement from the PIC
12760 base address (@GOTOFF). */
12762 if (reload_in_progress
)
12763 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12764 if (GET_CODE (addr
) == CONST
)
12765 addr
= XEXP (addr
, 0);
12766 if (GET_CODE (addr
) == PLUS
)
12768 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12770 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12773 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12774 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12776 tmpreg
= gen_reg_rtx (Pmode
);
12779 emit_move_insn (tmpreg
, new_rtx
);
12783 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12784 tmpreg
, 1, OPTAB_DIRECT
);
12788 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12790 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12792 /* This symbol may be referenced via a displacement from the PIC
12793 base address (@GOTOFF). */
12795 if (reload_in_progress
)
12796 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12797 if (GET_CODE (addr
) == CONST
)
12798 addr
= XEXP (addr
, 0);
12799 if (GET_CODE (addr
) == PLUS
)
12801 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12803 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12806 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12807 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12808 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12812 emit_move_insn (reg
, new_rtx
);
12816 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12817 /* We can't use @GOTOFF for text labels on VxWorks;
12818 see gotoff_operand. */
12819 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12821 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12825 /* For x64 PE-COFF there is no GOT table. So we use address
12827 if (TARGET_64BIT
&& TARGET_PECOFF
)
12829 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12830 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12833 reg
= gen_reg_rtx (Pmode
);
12834 emit_move_insn (reg
, new_rtx
);
12837 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12839 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12840 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12841 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12842 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12845 reg
= gen_reg_rtx (Pmode
);
12846 /* Use directly gen_movsi, otherwise the address is loaded
12847 into register for CSE. We don't want to CSE this addresses,
12848 instead we CSE addresses from the GOT table, so skip this. */
12849 emit_insn (gen_movsi (reg
, new_rtx
));
12854 /* This symbol must be referenced via a load from the
12855 Global Offset Table (@GOT). */
12857 if (reload_in_progress
)
12858 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12859 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12860 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12862 new_rtx
= force_reg (Pmode
, new_rtx
);
12863 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12864 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12865 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12868 reg
= gen_reg_rtx (Pmode
);
12869 emit_move_insn (reg
, new_rtx
);
12875 if (CONST_INT_P (addr
)
12876 && !x86_64_immediate_operand (addr
, VOIDmode
))
12880 emit_move_insn (reg
, addr
);
12884 new_rtx
= force_reg (Pmode
, addr
);
12886 else if (GET_CODE (addr
) == CONST
)
12888 addr
= XEXP (addr
, 0);
12890 /* We must match stuff we generate before. Assume the only
12891 unspecs that can get here are ours. Not that we could do
12892 anything with them anyway.... */
12893 if (GET_CODE (addr
) == UNSPEC
12894 || (GET_CODE (addr
) == PLUS
12895 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12897 gcc_assert (GET_CODE (addr
) == PLUS
);
12899 if (GET_CODE (addr
) == PLUS
)
12901 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12903 /* Check first to see if this is a constant offset from a @GOTOFF
12904 symbol reference. */
12905 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
12906 && CONST_INT_P (op1
))
12910 if (reload_in_progress
)
12911 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12912 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12914 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12915 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12916 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12920 emit_move_insn (reg
, new_rtx
);
12926 if (INTVAL (op1
) < -16*1024*1024
12927 || INTVAL (op1
) >= 16*1024*1024)
12929 if (!x86_64_immediate_operand (op1
, Pmode
))
12930 op1
= force_reg (Pmode
, op1
);
12931 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12937 rtx base
= legitimize_pic_address (op0
, reg
);
12938 enum machine_mode mode
= GET_MODE (base
);
12940 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12942 if (CONST_INT_P (new_rtx
))
12944 if (INTVAL (new_rtx
) < -16*1024*1024
12945 || INTVAL (new_rtx
) >= 16*1024*1024)
12947 if (!x86_64_immediate_operand (new_rtx
, mode
))
12948 new_rtx
= force_reg (mode
, new_rtx
);
12950 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12953 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12957 if (GET_CODE (new_rtx
) == PLUS
12958 && CONSTANT_P (XEXP (new_rtx
, 1)))
12960 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12961 new_rtx
= XEXP (new_rtx
, 1);
12963 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12971 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12974 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12976 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12978 if (GET_MODE (tp
) != tp_mode
)
12980 gcc_assert (GET_MODE (tp
) == SImode
);
12981 gcc_assert (tp_mode
== DImode
);
12983 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12987 tp
= copy_to_mode_reg (tp_mode
, tp
);
12992 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12994 static GTY(()) rtx ix86_tls_symbol
;
12997 ix86_tls_get_addr (void)
12999 if (!ix86_tls_symbol
)
13002 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
13003 ? "___tls_get_addr" : "__tls_get_addr");
13005 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
13008 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
13010 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
13012 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
13013 gen_rtx_CONST (Pmode
, unspec
));
13016 return ix86_tls_symbol
;
13019 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13021 static GTY(()) rtx ix86_tls_module_base_symbol
;
13024 ix86_tls_module_base (void)
13026 if (!ix86_tls_module_base_symbol
)
13028 ix86_tls_module_base_symbol
13029 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
13031 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13032 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13035 return ix86_tls_module_base_symbol
;
13038 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13039 false if we expect this to be used for a memory address and true if
13040 we expect to load the address into a register. */
13043 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13045 rtx dest
, base
, off
;
13046 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13047 enum machine_mode tp_mode
= Pmode
;
13052 case TLS_MODEL_GLOBAL_DYNAMIC
:
13053 dest
= gen_reg_rtx (Pmode
);
13057 if (flag_pic
&& !TARGET_PECOFF
)
13058 pic
= pic_offset_table_rtx
;
13061 pic
= gen_reg_rtx (Pmode
);
13062 emit_insn (gen_set_got (pic
));
13066 if (TARGET_GNU2_TLS
)
13069 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13071 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13073 tp
= get_thread_pointer (Pmode
, true);
13074 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13076 if (GET_MODE (x
) != Pmode
)
13077 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13079 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13083 rtx caddr
= ix86_tls_get_addr ();
13087 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13092 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13093 insns
= get_insns ();
13096 if (GET_MODE (x
) != Pmode
)
13097 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13099 RTL_CONST_CALL_P (insns
) = 1;
13100 emit_libcall_block (insns
, dest
, rax
, x
);
13103 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13107 case TLS_MODEL_LOCAL_DYNAMIC
:
13108 base
= gen_reg_rtx (Pmode
);
13113 pic
= pic_offset_table_rtx
;
13116 pic
= gen_reg_rtx (Pmode
);
13117 emit_insn (gen_set_got (pic
));
13121 if (TARGET_GNU2_TLS
)
13123 rtx tmp
= ix86_tls_module_base ();
13126 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13128 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13130 tp
= get_thread_pointer (Pmode
, true);
13131 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13132 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13136 rtx caddr
= ix86_tls_get_addr ();
13140 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13145 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13146 insns
= get_insns ();
13149 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13150 share the LD_BASE result with other LD model accesses. */
13151 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13152 UNSPEC_TLS_LD_BASE
);
13154 RTL_CONST_CALL_P (insns
) = 1;
13155 emit_libcall_block (insns
, base
, rax
, eqv
);
13158 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13161 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13162 off
= gen_rtx_CONST (Pmode
, off
);
13164 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13166 if (TARGET_GNU2_TLS
)
13168 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13170 if (GET_MODE (x
) != Pmode
)
13171 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13173 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13177 case TLS_MODEL_INITIAL_EXEC
:
13180 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13182 /* The Sun linker took the AMD64 TLS spec literally
13183 and can only handle %rax as destination of the
13184 initial executable code sequence. */
13186 dest
= gen_reg_rtx (DImode
);
13187 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13191 /* Generate DImode references to avoid %fs:(%reg32)
13192 problems and linker IE->LE relaxation bug. */
13195 type
= UNSPEC_GOTNTPOFF
;
13199 if (reload_in_progress
)
13200 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13201 pic
= pic_offset_table_rtx
;
13202 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13204 else if (!TARGET_ANY_GNU_TLS
)
13206 pic
= gen_reg_rtx (Pmode
);
13207 emit_insn (gen_set_got (pic
));
13208 type
= UNSPEC_GOTTPOFF
;
13213 type
= UNSPEC_INDNTPOFF
;
13216 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13217 off
= gen_rtx_CONST (tp_mode
, off
);
13219 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13220 off
= gen_const_mem (tp_mode
, off
);
13221 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13223 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13225 base
= get_thread_pointer (tp_mode
,
13226 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13227 off
= force_reg (tp_mode
, off
);
13228 return gen_rtx_PLUS (tp_mode
, base
, off
);
13232 base
= get_thread_pointer (Pmode
, true);
13233 dest
= gen_reg_rtx (Pmode
);
13234 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13238 case TLS_MODEL_LOCAL_EXEC
:
13239 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13240 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13241 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13242 off
= gen_rtx_CONST (Pmode
, off
);
13244 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13246 base
= get_thread_pointer (Pmode
,
13247 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13248 return gen_rtx_PLUS (Pmode
, base
, off
);
13252 base
= get_thread_pointer (Pmode
, true);
13253 dest
= gen_reg_rtx (Pmode
);
13254 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13259 gcc_unreachable ();
13265 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13266 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13267 unique refptr-DECL symbol corresponding to symbol DECL. */
13269 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13270 htab_t dllimport_map
;
13273 get_dllimport_decl (tree decl
, bool beimport
)
13275 struct tree_map
*h
, in
;
13278 const char *prefix
;
13279 size_t namelen
, prefixlen
;
13284 if (!dllimport_map
)
13285 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13287 in
.hash
= htab_hash_pointer (decl
);
13288 in
.base
.from
= decl
;
13289 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13290 h
= (struct tree_map
*) *loc
;
13294 *loc
= h
= ggc_alloc_tree_map ();
13296 h
->base
.from
= decl
;
13297 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13298 VAR_DECL
, NULL
, ptr_type_node
);
13299 DECL_ARTIFICIAL (to
) = 1;
13300 DECL_IGNORED_P (to
) = 1;
13301 DECL_EXTERNAL (to
) = 1;
13302 TREE_READONLY (to
) = 1;
13304 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13305 name
= targetm
.strip_name_encoding (name
);
13307 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13308 ? "*__imp_" : "*__imp__";
13310 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13311 namelen
= strlen (name
);
13312 prefixlen
= strlen (prefix
);
13313 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13314 memcpy (imp_name
, prefix
, prefixlen
);
13315 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13317 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13318 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13319 SET_SYMBOL_REF_DECL (rtl
, to
);
13320 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13323 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13324 #ifdef SUB_TARGET_RECORD_STUB
13325 SUB_TARGET_RECORD_STUB (name
);
13329 rtl
= gen_const_mem (Pmode
, rtl
);
13330 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13332 SET_DECL_RTL (to
, rtl
);
13333 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13338 /* Expand SYMBOL into its corresponding far-addresse symbol.
13339 WANT_REG is true if we require the result be a register. */
13342 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13347 gcc_assert (SYMBOL_REF_DECL (symbol
));
13348 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13350 x
= DECL_RTL (imp_decl
);
13352 x
= force_reg (Pmode
, x
);
13356 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13357 true if we require the result be a register. */
13360 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13365 gcc_assert (SYMBOL_REF_DECL (symbol
));
13366 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13368 x
= DECL_RTL (imp_decl
);
13370 x
= force_reg (Pmode
, x
);
13374 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13375 is true if we require the result be a register. */
13378 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13380 if (!TARGET_PECOFF
)
13383 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13385 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13386 return legitimize_dllimport_symbol (addr
, inreg
);
13387 if (GET_CODE (addr
) == CONST
13388 && GET_CODE (XEXP (addr
, 0)) == PLUS
13389 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13390 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13392 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13393 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13397 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13399 if (GET_CODE (addr
) == SYMBOL_REF
13400 && !is_imported_p (addr
)
13401 && SYMBOL_REF_EXTERNAL_P (addr
)
13402 && SYMBOL_REF_DECL (addr
))
13403 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13405 if (GET_CODE (addr
) == CONST
13406 && GET_CODE (XEXP (addr
, 0)) == PLUS
13407 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13408 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13409 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13410 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13412 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13413 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13418 /* Try machine-dependent ways of modifying an illegitimate address
13419 to be legitimate. If we find one, return the new, valid address.
13420 This macro is used in only one place: `memory_address' in explow.c.
13422 OLDX is the address as it was before break_out_memory_refs was called.
13423 In some cases it is useful to look at this to decide what needs to be done.
13425 It is always safe for this macro to do nothing. It exists to recognize
13426 opportunities to optimize the output.
13428 For the 80386, we handle X+REG by loading X into a register R and
13429 using R+REG. R will go in a general reg and indexing will be used.
13430 However, if REG is a broken-out memory address or multiplication,
13431 nothing needs to be done because REG can certainly go in a general reg.
13433 When -fpic is used, special handling is needed for symbolic references.
13434 See comments by legitimize_pic_address in i386.c for details. */
13437 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13438 enum machine_mode mode
)
13443 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13445 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13446 if (GET_CODE (x
) == CONST
13447 && GET_CODE (XEXP (x
, 0)) == PLUS
13448 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13449 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13451 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13452 (enum tls_model
) log
, false);
13453 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13456 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13458 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13463 if (flag_pic
&& SYMBOLIC_CONST (x
))
13464 return legitimize_pic_address (x
, 0);
13467 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13468 return machopic_indirect_data_reference (x
, 0);
13471 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13472 if (GET_CODE (x
) == ASHIFT
13473 && CONST_INT_P (XEXP (x
, 1))
13474 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13477 log
= INTVAL (XEXP (x
, 1));
13478 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13479 GEN_INT (1 << log
));
13482 if (GET_CODE (x
) == PLUS
)
13484 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13486 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13487 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13488 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13491 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13492 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13493 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13494 GEN_INT (1 << log
));
13497 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13498 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13499 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13502 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13503 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13504 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13505 GEN_INT (1 << log
));
13508 /* Put multiply first if it isn't already. */
13509 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13511 rtx tmp
= XEXP (x
, 0);
13512 XEXP (x
, 0) = XEXP (x
, 1);
13517 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13518 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13519 created by virtual register instantiation, register elimination, and
13520 similar optimizations. */
13521 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13524 x
= gen_rtx_PLUS (Pmode
,
13525 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13526 XEXP (XEXP (x
, 1), 0)),
13527 XEXP (XEXP (x
, 1), 1));
13531 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13532 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13533 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13534 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13535 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13536 && CONSTANT_P (XEXP (x
, 1)))
13539 rtx other
= NULL_RTX
;
13541 if (CONST_INT_P (XEXP (x
, 1)))
13543 constant
= XEXP (x
, 1);
13544 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13546 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13548 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13549 other
= XEXP (x
, 1);
13557 x
= gen_rtx_PLUS (Pmode
,
13558 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13559 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13560 plus_constant (Pmode
, other
,
13561 INTVAL (constant
)));
13565 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13568 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13571 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13574 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13577 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13581 && REG_P (XEXP (x
, 1))
13582 && REG_P (XEXP (x
, 0)))
13585 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13588 x
= legitimize_pic_address (x
, 0);
13591 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13594 if (REG_P (XEXP (x
, 0)))
13596 rtx temp
= gen_reg_rtx (Pmode
);
13597 rtx val
= force_operand (XEXP (x
, 1), temp
);
13600 val
= convert_to_mode (Pmode
, val
, 1);
13601 emit_move_insn (temp
, val
);
13604 XEXP (x
, 1) = temp
;
13608 else if (REG_P (XEXP (x
, 1)))
13610 rtx temp
= gen_reg_rtx (Pmode
);
13611 rtx val
= force_operand (XEXP (x
, 0), temp
);
13614 val
= convert_to_mode (Pmode
, val
, 1);
13615 emit_move_insn (temp
, val
);
13618 XEXP (x
, 0) = temp
;
13626 /* Print an integer constant expression in assembler syntax. Addition
13627 and subtraction are the only arithmetic that may appear in these
13628 expressions. FILE is the stdio stream to write to, X is the rtx, and
13629 CODE is the operand print code from the output string. */
13632 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13636 switch (GET_CODE (x
))
13639 gcc_assert (flag_pic
);
13644 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13645 output_addr_const (file
, x
);
13648 const char *name
= XSTR (x
, 0);
13650 /* Mark the decl as referenced so that cgraph will
13651 output the function. */
13652 if (SYMBOL_REF_DECL (x
))
13653 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13656 if (MACHOPIC_INDIRECT
13657 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13658 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13660 assemble_name (file
, name
);
13662 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13663 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13664 fputs ("@PLT", file
);
13671 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13672 assemble_name (asm_out_file
, buf
);
13676 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13680 /* This used to output parentheses around the expression,
13681 but that does not work on the 386 (either ATT or BSD assembler). */
13682 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13686 if (GET_MODE (x
) == VOIDmode
)
13688 /* We can use %d if the number is <32 bits and positive. */
13689 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13690 fprintf (file
, "0x%lx%08lx",
13691 (unsigned long) CONST_DOUBLE_HIGH (x
),
13692 (unsigned long) CONST_DOUBLE_LOW (x
));
13694 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13697 /* We can't handle floating point constants;
13698 TARGET_PRINT_OPERAND must handle them. */
13699 output_operand_lossage ("floating constant misused");
13703 /* Some assemblers need integer constants to appear first. */
13704 if (CONST_INT_P (XEXP (x
, 0)))
13706 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13708 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13712 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13713 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13715 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13721 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13722 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13724 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13726 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13730 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13732 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13737 gcc_assert (XVECLEN (x
, 0) == 1);
13738 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13739 switch (XINT (x
, 1))
13742 fputs ("@GOT", file
);
13744 case UNSPEC_GOTOFF
:
13745 fputs ("@GOTOFF", file
);
13747 case UNSPEC_PLTOFF
:
13748 fputs ("@PLTOFF", file
);
13751 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13752 "(%rip)" : "[rip]", file
);
13754 case UNSPEC_GOTPCREL
:
13755 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13756 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13758 case UNSPEC_GOTTPOFF
:
13759 /* FIXME: This might be @TPOFF in Sun ld too. */
13760 fputs ("@gottpoff", file
);
13763 fputs ("@tpoff", file
);
13765 case UNSPEC_NTPOFF
:
13767 fputs ("@tpoff", file
);
13769 fputs ("@ntpoff", file
);
13771 case UNSPEC_DTPOFF
:
13772 fputs ("@dtpoff", file
);
13774 case UNSPEC_GOTNTPOFF
:
13776 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13777 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13779 fputs ("@gotntpoff", file
);
13781 case UNSPEC_INDNTPOFF
:
13782 fputs ("@indntpoff", file
);
13785 case UNSPEC_MACHOPIC_OFFSET
:
13787 machopic_output_function_base_name (file
);
13791 output_operand_lossage ("invalid UNSPEC as operand");
13797 output_operand_lossage ("invalid expression as operand");
13801 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13802 We need to emit DTP-relative relocations. */
13804 static void ATTRIBUTE_UNUSED
13805 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13807 fputs (ASM_LONG
, file
);
13808 output_addr_const (file
, x
);
13809 fputs ("@dtpoff", file
);
13815 fputs (", 0", file
);
13818 gcc_unreachable ();
13822 /* Return true if X is a representation of the PIC register. This copes
13823 with calls from ix86_find_base_term, where the register might have
13824 been replaced by a cselib value. */
13827 ix86_pic_register_p (rtx x
)
13829 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13830 return (pic_offset_table_rtx
13831 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13833 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13836 /* Helper function for ix86_delegitimize_address.
13837 Attempt to delegitimize TLS local-exec accesses. */
13840 ix86_delegitimize_tls_address (rtx orig_x
)
13842 rtx x
= orig_x
, unspec
;
13843 struct ix86_address addr
;
13845 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13849 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13851 if (ix86_decompose_address (x
, &addr
) == 0
13852 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13853 || addr
.disp
== NULL_RTX
13854 || GET_CODE (addr
.disp
) != CONST
)
13856 unspec
= XEXP (addr
.disp
, 0);
13857 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13858 unspec
= XEXP (unspec
, 0);
13859 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13861 x
= XVECEXP (unspec
, 0, 0);
13862 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13863 if (unspec
!= XEXP (addr
.disp
, 0))
13864 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13867 rtx idx
= addr
.index
;
13868 if (addr
.scale
!= 1)
13869 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13870 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13873 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13874 if (MEM_P (orig_x
))
13875 x
= replace_equiv_address_nv (orig_x
, x
);
13879 /* In the name of slightly smaller debug output, and to cater to
13880 general assembler lossage, recognize PIC+GOTOFF and turn it back
13881 into a direct symbol reference.
13883 On Darwin, this is necessary to avoid a crash, because Darwin
13884 has a different PIC label for each routine but the DWARF debugging
13885 information is not associated with any particular routine, so it's
13886 necessary to remove references to the PIC label from RTL stored by
13887 the DWARF output code. */
13890 ix86_delegitimize_address (rtx x
)
13892 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13893 /* addend is NULL or some rtx if x is something+GOTOFF where
13894 something doesn't include the PIC register. */
13895 rtx addend
= NULL_RTX
;
13896 /* reg_addend is NULL or a multiple of some register. */
13897 rtx reg_addend
= NULL_RTX
;
13898 /* const_addend is NULL or a const_int. */
13899 rtx const_addend
= NULL_RTX
;
13900 /* This is the result, or NULL. */
13901 rtx result
= NULL_RTX
;
13910 if (GET_CODE (x
) == CONST
13911 && GET_CODE (XEXP (x
, 0)) == PLUS
13912 && GET_MODE (XEXP (x
, 0)) == Pmode
13913 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13914 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13915 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13917 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13918 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13919 if (MEM_P (orig_x
))
13920 x
= replace_equiv_address_nv (orig_x
, x
);
13924 if (GET_CODE (x
) == CONST
13925 && GET_CODE (XEXP (x
, 0)) == UNSPEC
13926 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
13927 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
13928 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
13930 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13931 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13933 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13941 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
13942 return ix86_delegitimize_tls_address (orig_x
);
13944 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13945 and -mcmodel=medium -fpic. */
13948 if (GET_CODE (x
) != PLUS
13949 || GET_CODE (XEXP (x
, 1)) != CONST
)
13950 return ix86_delegitimize_tls_address (orig_x
);
13952 if (ix86_pic_register_p (XEXP (x
, 0)))
13953 /* %ebx + GOT/GOTOFF */
13955 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13957 /* %ebx + %reg * scale + GOT/GOTOFF */
13958 reg_addend
= XEXP (x
, 0);
13959 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13960 reg_addend
= XEXP (reg_addend
, 1);
13961 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13962 reg_addend
= XEXP (reg_addend
, 0);
13965 reg_addend
= NULL_RTX
;
13966 addend
= XEXP (x
, 0);
13970 addend
= XEXP (x
, 0);
13972 x
= XEXP (XEXP (x
, 1), 0);
13973 if (GET_CODE (x
) == PLUS
13974 && CONST_INT_P (XEXP (x
, 1)))
13976 const_addend
= XEXP (x
, 1);
13980 if (GET_CODE (x
) == UNSPEC
13981 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13982 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
13983 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
13984 && !MEM_P (orig_x
) && !addend
)))
13985 result
= XVECEXP (x
, 0, 0);
13987 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
13988 && !MEM_P (orig_x
))
13989 result
= XVECEXP (x
, 0, 0);
13992 return ix86_delegitimize_tls_address (orig_x
);
13995 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13997 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
14000 /* If the rest of original X doesn't involve the PIC register, add
14001 addend and subtract pic_offset_table_rtx. This can happen e.g.
14003 leal (%ebx, %ecx, 4), %ecx
14005 movl foo@GOTOFF(%ecx), %edx
14006 in which case we return (%ecx - %ebx) + foo. */
14007 if (pic_offset_table_rtx
)
14008 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
14009 pic_offset_table_rtx
),
14014 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
14016 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
14017 if (result
== NULL_RTX
)
14023 /* If X is a machine specific address (i.e. a symbol or label being
14024 referenced as a displacement from the GOT implemented using an
14025 UNSPEC), then return the base term. Otherwise return X. */
14028 ix86_find_base_term (rtx x
)
14034 if (GET_CODE (x
) != CONST
)
14036 term
= XEXP (x
, 0);
14037 if (GET_CODE (term
) == PLUS
14038 && (CONST_INT_P (XEXP (term
, 1))
14039 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14040 term
= XEXP (term
, 0);
14041 if (GET_CODE (term
) != UNSPEC
14042 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14043 && XINT (term
, 1) != UNSPEC_PCREL
))
14046 return XVECEXP (term
, 0, 0);
14049 return ix86_delegitimize_address (x
);
14053 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14054 bool fp
, FILE *file
)
14056 const char *suffix
;
14058 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14060 code
= ix86_fp_compare_code_to_integer (code
);
14064 code
= reverse_condition (code
);
14115 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14119 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14120 Those same assemblers have the same but opposite lossage on cmov. */
14121 if (mode
== CCmode
)
14122 suffix
= fp
? "nbe" : "a";
14124 gcc_unreachable ();
14140 gcc_unreachable ();
14144 if (mode
== CCmode
)
14146 else if (mode
== CCCmode
)
14149 gcc_unreachable ();
14165 gcc_unreachable ();
14169 if (mode
== CCmode
)
14170 suffix
= fp
? "nb" : "ae";
14171 else if (mode
== CCCmode
)
14174 gcc_unreachable ();
14177 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14181 if (mode
== CCmode
)
14184 gcc_unreachable ();
14187 suffix
= fp
? "u" : "p";
14190 suffix
= fp
? "nu" : "np";
14193 gcc_unreachable ();
14195 fputs (suffix
, file
);
14198 /* Print the name of register X to FILE based on its machine mode and number.
14199 If CODE is 'w', pretend the mode is HImode.
14200 If CODE is 'b', pretend the mode is QImode.
14201 If CODE is 'k', pretend the mode is SImode.
14202 If CODE is 'q', pretend the mode is DImode.
14203 If CODE is 'x', pretend the mode is V4SFmode.
14204 If CODE is 't', pretend the mode is V8SFmode.
14205 If CODE is 'g', pretend the mode is V16SFmode.
14206 If CODE is 'h', pretend the reg is the 'high' byte register.
14207 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14208 If CODE is 'd', duplicate the operand for AVX instruction.
14212 print_reg (rtx x
, int code
, FILE *file
)
14215 unsigned int regno
;
14216 bool duplicated
= code
== 'd' && TARGET_AVX
;
14218 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14223 gcc_assert (TARGET_64BIT
);
14224 fputs ("rip", file
);
14228 regno
= true_regnum (x
);
14229 gcc_assert (regno
!= ARG_POINTER_REGNUM
14230 && regno
!= FRAME_POINTER_REGNUM
14231 && regno
!= FLAGS_REG
14232 && regno
!= FPSR_REG
14233 && regno
!= FPCR_REG
);
14235 if (code
== 'w' || MMX_REG_P (x
))
14237 else if (code
== 'b')
14239 else if (code
== 'k')
14241 else if (code
== 'q')
14243 else if (code
== 'y')
14245 else if (code
== 'h')
14247 else if (code
== 'x')
14249 else if (code
== 't')
14251 else if (code
== 'g')
14254 code
= GET_MODE_SIZE (GET_MODE (x
));
14256 /* Irritatingly, AMD extended registers use different naming convention
14257 from the normal registers: "r%d[bwd]" */
14258 if (REX_INT_REGNO_P (regno
))
14260 gcc_assert (TARGET_64BIT
);
14262 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14266 error ("extended registers have no high halves");
14281 error ("unsupported operand size for extended register");
14291 if (STACK_TOP_P (x
))
14300 if (! ANY_FP_REG_P (x
) && ! ANY_BND_REG_P (x
))
14301 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14306 reg
= hi_reg_name
[regno
];
14309 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14311 reg
= qi_reg_name
[regno
];
14314 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14316 reg
= qi_high_reg_name
[regno
];
14321 gcc_assert (!duplicated
);
14323 fputs (hi_reg_name
[regno
] + 1, file
);
14329 gcc_assert (!duplicated
);
14331 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14336 gcc_unreachable ();
14342 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14343 fprintf (file
, ", %%%s", reg
);
14345 fprintf (file
, ", %s", reg
);
14349 /* Locate some local-dynamic symbol still in use by this function
14350 so that we can print its name in some tls_local_dynamic_base
14354 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14358 if (GET_CODE (x
) == SYMBOL_REF
14359 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14361 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14368 static const char *
14369 get_some_local_dynamic_name (void)
14373 if (cfun
->machine
->some_ld_name
)
14374 return cfun
->machine
->some_ld_name
;
14376 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14377 if (NONDEBUG_INSN_P (insn
)
14378 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14379 return cfun
->machine
->some_ld_name
;
14384 /* Meaning of CODE:
14385 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14386 C -- print opcode suffix for set/cmov insn.
14387 c -- like C, but print reversed condition
14388 F,f -- likewise, but for floating-point.
14389 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14391 R -- print the prefix for register names.
14392 z -- print the opcode suffix for the size of the current operand.
14393 Z -- likewise, with special suffixes for x87 instructions.
14394 * -- print a star (in certain assembler syntax)
14395 A -- print an absolute memory reference.
14396 E -- print address with DImode register names if TARGET_64BIT.
14397 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14398 s -- print a shift double count, followed by the assemblers argument
14400 b -- print the QImode name of the register for the indicated operand.
14401 %b0 would print %al if operands[0] is reg 0.
14402 w -- likewise, print the HImode name of the register.
14403 k -- likewise, print the SImode name of the register.
14404 q -- likewise, print the DImode name of the register.
14405 x -- likewise, print the V4SFmode name of the register.
14406 t -- likewise, print the V8SFmode name of the register.
14407 g -- likewise, print the V16SFmode name of the register.
14408 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14409 y -- print "st(0)" instead of "st" as a register.
14410 d -- print duplicated register operand for AVX instruction.
14411 D -- print condition for SSE cmp instruction.
14412 P -- if PIC, print an @PLT suffix.
14413 p -- print raw symbol name.
14414 X -- don't print any sort of PIC '@' suffix for a symbol.
14415 & -- print some in-use local-dynamic symbol name.
14416 H -- print a memory address offset by 8; used for sse high-parts
14417 Y -- print condition for XOP pcom* instruction.
14418 + -- print a branch hint as 'cs' or 'ds' prefix
14419 ; -- print a semicolon (after prefixes due to bug in older gas).
14420 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14421 @ -- print a segment register of thread base pointer load
14422 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14423 ! -- print MPX prefix for jxx/call/ret instructions if required.
14427 ix86_print_operand (FILE *file
, rtx x
, int code
)
14434 switch (ASSEMBLER_DIALECT
)
14441 /* Intel syntax. For absolute addresses, registers should not
14442 be surrounded by braces. */
14446 ix86_print_operand (file
, x
, 0);
14453 gcc_unreachable ();
14456 ix86_print_operand (file
, x
, 0);
14460 /* Wrap address in an UNSPEC to declare special handling. */
14462 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14464 output_address (x
);
14468 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14473 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14478 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14483 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14488 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14493 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14498 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14499 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14502 switch (GET_MODE_SIZE (GET_MODE (x
)))
14517 output_operand_lossage
14518 ("invalid operand size for operand code 'O'");
14527 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14529 /* Opcodes don't get size suffixes if using Intel opcodes. */
14530 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14533 switch (GET_MODE_SIZE (GET_MODE (x
)))
14552 output_operand_lossage
14553 ("invalid operand size for operand code 'z'");
14558 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14560 (0, "non-integer operand used with operand code 'z'");
14564 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14565 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14568 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14570 switch (GET_MODE_SIZE (GET_MODE (x
)))
14573 #ifdef HAVE_AS_IX86_FILDS
14583 #ifdef HAVE_AS_IX86_FILDQ
14586 fputs ("ll", file
);
14594 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14596 /* 387 opcodes don't get size suffixes
14597 if the operands are registers. */
14598 if (STACK_REG_P (x
))
14601 switch (GET_MODE_SIZE (GET_MODE (x
)))
14622 output_operand_lossage
14623 ("invalid operand type used with operand code 'Z'");
14627 output_operand_lossage
14628 ("invalid operand size for operand code 'Z'");
14647 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14649 ix86_print_operand (file
, x
, 0);
14650 fputs (", ", file
);
14655 switch (GET_CODE (x
))
14658 fputs ("neq", file
);
14661 fputs ("eq", file
);
14665 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14669 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14673 fputs ("le", file
);
14677 fputs ("lt", file
);
14680 fputs ("unord", file
);
14683 fputs ("ord", file
);
14686 fputs ("ueq", file
);
14689 fputs ("nlt", file
);
14692 fputs ("nle", file
);
14695 fputs ("ule", file
);
14698 fputs ("ult", file
);
14701 fputs ("une", file
);
14704 output_operand_lossage ("operand is not a condition code, "
14705 "invalid operand code 'Y'");
14711 /* Little bit of braindamage here. The SSE compare instructions
14712 does use completely different names for the comparisons that the
14713 fp conditional moves. */
14714 switch (GET_CODE (x
))
14719 fputs ("eq_us", file
);
14723 fputs ("eq", file
);
14728 fputs ("nge", file
);
14732 fputs ("lt", file
);
14737 fputs ("ngt", file
);
14741 fputs ("le", file
);
14744 fputs ("unord", file
);
14749 fputs ("neq_oq", file
);
14753 fputs ("neq", file
);
14758 fputs ("ge", file
);
14762 fputs ("nlt", file
);
14767 fputs ("gt", file
);
14771 fputs ("nle", file
);
14774 fputs ("ord", file
);
14777 output_operand_lossage ("operand is not a condition code, "
14778 "invalid operand code 'D'");
14785 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14786 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14792 if (!COMPARISON_P (x
))
14794 output_operand_lossage ("operand is not a condition code, "
14795 "invalid operand code '%c'", code
);
14798 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14799 code
== 'c' || code
== 'f',
14800 code
== 'F' || code
== 'f',
14805 if (!offsettable_memref_p (x
))
14807 output_operand_lossage ("operand is not an offsettable memory "
14808 "reference, invalid operand code 'H'");
14811 /* It doesn't actually matter what mode we use here, as we're
14812 only going to use this for printing. */
14813 x
= adjust_address_nv (x
, DImode
, 8);
14814 /* Output 'qword ptr' for intel assembler dialect. */
14815 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14820 gcc_assert (CONST_INT_P (x
));
14822 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14823 #ifdef HAVE_AS_IX86_HLE
14824 fputs ("xacquire ", file
);
14826 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14828 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14829 #ifdef HAVE_AS_IX86_HLE
14830 fputs ("xrelease ", file
);
14832 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14834 /* We do not want to print value of the operand. */
14838 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14844 const char *name
= get_some_local_dynamic_name ();
14846 output_operand_lossage ("'%%&' used without any "
14847 "local dynamic TLS references");
14849 assemble_name (file
, name
);
14858 || optimize_function_for_size_p (cfun
)
14859 || !TARGET_BRANCH_PREDICTION_HINTS
)
14862 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14865 int pred_val
= XINT (x
, 0);
14867 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14868 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14870 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14872 = final_forward_branch_p (current_output_insn
) == 0;
14874 /* Emit hints only in the case default branch prediction
14875 heuristics would fail. */
14876 if (taken
!= cputaken
)
14878 /* We use 3e (DS) prefix for taken branches and
14879 2e (CS) prefix for not taken branches. */
14881 fputs ("ds ; ", file
);
14883 fputs ("cs ; ", file
);
14891 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14897 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14900 /* The kernel uses a different segment register for performance
14901 reasons; a system call would not have to trash the userspace
14902 segment register, which would be expensive. */
14903 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14904 fputs ("fs", file
);
14906 fputs ("gs", file
);
14910 putc (TARGET_AVX2
? 'i' : 'f', file
);
14914 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14915 fputs ("addr32 ", file
);
14919 if (ix86_bnd_prefixed_insn_p (NULL_RTX
))
14920 fputs ("bnd ", file
);
14924 output_operand_lossage ("invalid operand code '%c'", code
);
14929 print_reg (x
, code
, file
);
14931 else if (MEM_P (x
))
14933 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14934 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14935 && GET_MODE (x
) != BLKmode
)
14938 switch (GET_MODE_SIZE (GET_MODE (x
)))
14940 case 1: size
= "BYTE"; break;
14941 case 2: size
= "WORD"; break;
14942 case 4: size
= "DWORD"; break;
14943 case 8: size
= "QWORD"; break;
14944 case 12: size
= "TBYTE"; break;
14946 if (GET_MODE (x
) == XFmode
)
14951 case 32: size
= "YMMWORD"; break;
14952 case 64: size
= "ZMMWORD"; break;
14954 gcc_unreachable ();
14957 /* Check for explicit size override (codes 'b', 'w', 'k',
14961 else if (code
== 'w')
14963 else if (code
== 'k')
14965 else if (code
== 'q')
14967 else if (code
== 'x')
14970 fputs (size
, file
);
14971 fputs (" PTR ", file
);
14975 /* Avoid (%rip) for call operands. */
14976 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14977 && !CONST_INT_P (x
))
14978 output_addr_const (file
, x
);
14979 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14980 output_operand_lossage ("invalid constraints for operand");
14982 output_address (x
);
14985 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14990 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14991 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14993 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14995 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14997 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14998 (unsigned long long) (int) l
);
15000 fprintf (file
, "0x%08x", (unsigned int) l
);
15003 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
15008 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15009 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
15011 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15013 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
15016 /* These float cases don't actually occur as immediate operands. */
15017 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
15021 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
15022 fputs (dstr
, file
);
15027 /* We have patterns that allow zero sets of memory, for instance.
15028 In 64-bit mode, we should probably support all 8-byte vectors,
15029 since we can in fact encode that into an immediate. */
15030 if (GET_CODE (x
) == CONST_VECTOR
)
15032 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
15036 if (code
!= 'P' && code
!= 'p')
15038 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
15040 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15043 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
15044 || GET_CODE (x
) == LABEL_REF
)
15046 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15049 fputs ("OFFSET FLAT:", file
);
15052 if (CONST_INT_P (x
))
15053 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15054 else if (flag_pic
|| MACHOPIC_INDIRECT
)
15055 output_pic_addr_const (file
, x
, code
);
15057 output_addr_const (file
, x
);
15062 ix86_print_operand_punct_valid_p (unsigned char code
)
15064 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15065 || code
== ';' || code
== '~' || code
== '^' || code
== '!');
15068 /* Print a memory operand whose address is ADDR. */
15071 ix86_print_operand_address (FILE *file
, rtx addr
)
15073 struct ix86_address parts
;
15074 rtx base
, index
, disp
;
15080 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15082 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15083 gcc_assert (parts
.index
== NULL_RTX
);
15084 parts
.index
= XVECEXP (addr
, 0, 1);
15085 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15086 addr
= XVECEXP (addr
, 0, 0);
15089 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15091 gcc_assert (TARGET_64BIT
);
15092 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15095 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDMK_ADDR
)
15097 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 1), &parts
);
15098 gcc_assert (parts
.base
== NULL_RTX
|| parts
.index
== NULL_RTX
);
15099 if (parts
.base
!= NULL_RTX
)
15101 parts
.index
= parts
.base
;
15104 parts
.base
= XVECEXP (addr
, 0, 0);
15105 addr
= XVECEXP (addr
, 0, 0);
15107 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDLDX_ADDR
)
15109 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15110 gcc_assert (parts
.index
== NULL_RTX
);
15111 parts
.index
= XVECEXP (addr
, 0, 1);
15112 addr
= XVECEXP (addr
, 0, 0);
15115 ok
= ix86_decompose_address (addr
, &parts
);
15120 index
= parts
.index
;
15122 scale
= parts
.scale
;
15130 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15132 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15135 gcc_unreachable ();
15138 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15139 if (TARGET_64BIT
&& !base
&& !index
)
15143 if (GET_CODE (disp
) == CONST
15144 && GET_CODE (XEXP (disp
, 0)) == PLUS
15145 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15146 symbol
= XEXP (XEXP (disp
, 0), 0);
15148 if (GET_CODE (symbol
) == LABEL_REF
15149 || (GET_CODE (symbol
) == SYMBOL_REF
15150 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15153 if (!base
&& !index
)
15155 /* Displacement only requires special attention. */
15157 if (CONST_INT_P (disp
))
15159 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15160 fputs ("ds:", file
);
15161 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15164 output_pic_addr_const (file
, disp
, 0);
15166 output_addr_const (file
, disp
);
15170 /* Print SImode register names to force addr32 prefix. */
15171 if (SImode_address_operand (addr
, VOIDmode
))
15173 #ifdef ENABLE_CHECKING
15174 gcc_assert (TARGET_64BIT
);
15175 switch (GET_CODE (addr
))
15178 gcc_assert (GET_MODE (addr
) == SImode
);
15179 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15183 gcc_assert (GET_MODE (addr
) == DImode
);
15186 gcc_unreachable ();
15189 gcc_assert (!code
);
15195 && CONST_INT_P (disp
)
15196 && INTVAL (disp
) < -16*1024*1024)
15198 /* X32 runs in 64-bit mode, where displacement, DISP, in
15199 address DISP(%r64), is encoded as 32-bit immediate sign-
15200 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15201 address is %r64 + 0xffffffffbffffd00. When %r64 <
15202 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15203 which is invalid for x32. The correct address is %r64
15204 - 0x40000300 == 0xf7ffdd64. To properly encode
15205 -0x40000300(%r64) for x32, we zero-extend negative
15206 displacement by forcing addr32 prefix which truncates
15207 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15208 zero-extend all negative displacements, including -1(%rsp).
15209 However, for small negative displacements, sign-extension
15210 won't cause overflow. We only zero-extend negative
15211 displacements if they < -16*1024*1024, which is also used
15212 to check legitimate address displacements for PIC. */
15216 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15221 output_pic_addr_const (file
, disp
, 0);
15222 else if (GET_CODE (disp
) == LABEL_REF
)
15223 output_asm_label (disp
);
15225 output_addr_const (file
, disp
);
15230 print_reg (base
, code
, file
);
15234 print_reg (index
, vsib
? 0 : code
, file
);
15235 if (scale
!= 1 || vsib
)
15236 fprintf (file
, ",%d", scale
);
15242 rtx offset
= NULL_RTX
;
15246 /* Pull out the offset of a symbol; print any symbol itself. */
15247 if (GET_CODE (disp
) == CONST
15248 && GET_CODE (XEXP (disp
, 0)) == PLUS
15249 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15251 offset
= XEXP (XEXP (disp
, 0), 1);
15252 disp
= gen_rtx_CONST (VOIDmode
,
15253 XEXP (XEXP (disp
, 0), 0));
15257 output_pic_addr_const (file
, disp
, 0);
15258 else if (GET_CODE (disp
) == LABEL_REF
)
15259 output_asm_label (disp
);
15260 else if (CONST_INT_P (disp
))
15263 output_addr_const (file
, disp
);
15269 print_reg (base
, code
, file
);
15272 if (INTVAL (offset
) >= 0)
15274 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15278 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15285 print_reg (index
, vsib
? 0 : code
, file
);
15286 if (scale
!= 1 || vsib
)
15287 fprintf (file
, "*%d", scale
);
15294 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15297 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15301 if (GET_CODE (x
) != UNSPEC
)
15304 op
= XVECEXP (x
, 0, 0);
15305 switch (XINT (x
, 1))
15307 case UNSPEC_GOTTPOFF
:
15308 output_addr_const (file
, op
);
15309 /* FIXME: This might be @TPOFF in Sun ld. */
15310 fputs ("@gottpoff", file
);
15313 output_addr_const (file
, op
);
15314 fputs ("@tpoff", file
);
15316 case UNSPEC_NTPOFF
:
15317 output_addr_const (file
, op
);
15319 fputs ("@tpoff", file
);
15321 fputs ("@ntpoff", file
);
15323 case UNSPEC_DTPOFF
:
15324 output_addr_const (file
, op
);
15325 fputs ("@dtpoff", file
);
15327 case UNSPEC_GOTNTPOFF
:
15328 output_addr_const (file
, op
);
15330 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15331 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15333 fputs ("@gotntpoff", file
);
15335 case UNSPEC_INDNTPOFF
:
15336 output_addr_const (file
, op
);
15337 fputs ("@indntpoff", file
);
15340 case UNSPEC_MACHOPIC_OFFSET
:
15341 output_addr_const (file
, op
);
15343 machopic_output_function_base_name (file
);
15347 case UNSPEC_STACK_CHECK
:
15351 gcc_assert (flag_split_stack
);
15353 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15354 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15356 gcc_unreachable ();
15359 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15370 /* Split one or more double-mode RTL references into pairs of half-mode
15371 references. The RTL can be REG, offsettable MEM, integer constant, or
15372 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15373 split and "num" is its length. lo_half and hi_half are output arrays
15374 that parallel "operands". */
15377 split_double_mode (enum machine_mode mode
, rtx operands
[],
15378 int num
, rtx lo_half
[], rtx hi_half
[])
15380 enum machine_mode half_mode
;
15386 half_mode
= DImode
;
15389 half_mode
= SImode
;
15392 gcc_unreachable ();
15395 byte
= GET_MODE_SIZE (half_mode
);
15399 rtx op
= operands
[num
];
15401 /* simplify_subreg refuse to split volatile memory addresses,
15402 but we still have to handle it. */
15405 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15406 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15410 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15411 GET_MODE (op
) == VOIDmode
15412 ? mode
: GET_MODE (op
), 0);
15413 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15414 GET_MODE (op
) == VOIDmode
15415 ? mode
: GET_MODE (op
), byte
);
15420 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15421 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15422 is the expression of the binary operation. The output may either be
15423 emitted here, or returned to the caller, like all output_* functions.
15425 There is no guarantee that the operands are the same mode, as they
15426 might be within FLOAT or FLOAT_EXTEND expressions. */
15428 #ifndef SYSV386_COMPAT
15429 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15430 wants to fix the assemblers because that causes incompatibility
15431 with gcc. No-one wants to fix gcc because that causes
15432 incompatibility with assemblers... You can use the option of
15433 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15434 #define SYSV386_COMPAT 1
15438 output_387_binary_op (rtx insn
, rtx
*operands
)
15440 static char buf
[40];
15443 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15445 #ifdef ENABLE_CHECKING
15446 /* Even if we do not want to check the inputs, this documents input
15447 constraints. Which helps in understanding the following code. */
15448 if (STACK_REG_P (operands
[0])
15449 && ((REG_P (operands
[1])
15450 && REGNO (operands
[0]) == REGNO (operands
[1])
15451 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15452 || (REG_P (operands
[2])
15453 && REGNO (operands
[0]) == REGNO (operands
[2])
15454 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15455 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15458 gcc_assert (is_sse
);
15461 switch (GET_CODE (operands
[3]))
15464 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15465 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15473 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15474 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15482 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15483 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15491 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15492 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15500 gcc_unreachable ();
15507 strcpy (buf
, ssep
);
15508 if (GET_MODE (operands
[0]) == SFmode
)
15509 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15511 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15515 strcpy (buf
, ssep
+ 1);
15516 if (GET_MODE (operands
[0]) == SFmode
)
15517 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15519 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15525 switch (GET_CODE (operands
[3]))
15529 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15531 rtx temp
= operands
[2];
15532 operands
[2] = operands
[1];
15533 operands
[1] = temp
;
15536 /* know operands[0] == operands[1]. */
15538 if (MEM_P (operands
[2]))
15544 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15546 if (STACK_TOP_P (operands
[0]))
15547 /* How is it that we are storing to a dead operand[2]?
15548 Well, presumably operands[1] is dead too. We can't
15549 store the result to st(0) as st(0) gets popped on this
15550 instruction. Instead store to operands[2] (which I
15551 think has to be st(1)). st(1) will be popped later.
15552 gcc <= 2.8.1 didn't have this check and generated
15553 assembly code that the Unixware assembler rejected. */
15554 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15556 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15560 if (STACK_TOP_P (operands
[0]))
15561 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15563 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15568 if (MEM_P (operands
[1]))
15574 if (MEM_P (operands
[2]))
15580 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15583 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15584 derived assemblers, confusingly reverse the direction of
15585 the operation for fsub{r} and fdiv{r} when the
15586 destination register is not st(0). The Intel assembler
15587 doesn't have this brain damage. Read !SYSV386_COMPAT to
15588 figure out what the hardware really does. */
15589 if (STACK_TOP_P (operands
[0]))
15590 p
= "{p\t%0, %2|rp\t%2, %0}";
15592 p
= "{rp\t%2, %0|p\t%0, %2}";
15594 if (STACK_TOP_P (operands
[0]))
15595 /* As above for fmul/fadd, we can't store to st(0). */
15596 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15598 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15603 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15606 if (STACK_TOP_P (operands
[0]))
15607 p
= "{rp\t%0, %1|p\t%1, %0}";
15609 p
= "{p\t%1, %0|rp\t%0, %1}";
15611 if (STACK_TOP_P (operands
[0]))
15612 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15614 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15619 if (STACK_TOP_P (operands
[0]))
15621 if (STACK_TOP_P (operands
[1]))
15622 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15624 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15627 else if (STACK_TOP_P (operands
[1]))
15630 p
= "{\t%1, %0|r\t%0, %1}";
15632 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15638 p
= "{r\t%2, %0|\t%0, %2}";
15640 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15646 gcc_unreachable ();
15653 /* Check if a 256bit AVX register is referenced inside of EXP. */
15656 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15660 if (GET_CODE (exp
) == SUBREG
)
15661 exp
= SUBREG_REG (exp
);
15664 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15670 /* Return needed mode for entity in optimize_mode_switching pass. */
15673 ix86_avx_u128_mode_needed (rtx insn
)
15679 /* Needed mode is set to AVX_U128_CLEAN if there are
15680 no 256bit modes used in function arguments. */
15681 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15683 link
= XEXP (link
, 1))
15685 if (GET_CODE (XEXP (link
, 0)) == USE
)
15687 rtx arg
= XEXP (XEXP (link
, 0), 0);
15689 if (ix86_check_avx256_register (&arg
, NULL
))
15690 return AVX_U128_ANY
;
15694 return AVX_U128_CLEAN
;
15697 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15698 changes state only when a 256bit register is written to, but we need
15699 to prevent the compiler from moving optimal insertion point above
15700 eventual read from 256bit register. */
15701 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15702 return AVX_U128_DIRTY
;
15704 return AVX_U128_ANY
;
15707 /* Return mode that i387 must be switched into
15708 prior to the execution of insn. */
15711 ix86_i387_mode_needed (int entity
, rtx insn
)
15713 enum attr_i387_cw mode
;
15715 /* The mode UNINITIALIZED is used to store control word after a
15716 function call or ASM pattern. The mode ANY specify that function
15717 has no requirements on the control word and make no changes in the
15718 bits we are interested in. */
15721 || (NONJUMP_INSN_P (insn
)
15722 && (asm_noperands (PATTERN (insn
)) >= 0
15723 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15724 return I387_CW_UNINITIALIZED
;
15726 if (recog_memoized (insn
) < 0)
15727 return I387_CW_ANY
;
15729 mode
= get_attr_i387_cw (insn
);
15734 if (mode
== I387_CW_TRUNC
)
15739 if (mode
== I387_CW_FLOOR
)
15744 if (mode
== I387_CW_CEIL
)
15749 if (mode
== I387_CW_MASK_PM
)
15754 gcc_unreachable ();
15757 return I387_CW_ANY
;
15760 /* Return mode that entity must be switched into
15761 prior to the execution of insn. */
15764 ix86_mode_needed (int entity
, rtx insn
)
15769 return ix86_avx_u128_mode_needed (insn
);
15774 return ix86_i387_mode_needed (entity
, insn
);
15776 gcc_unreachable ();
15781 /* Check if a 256bit AVX register is referenced in stores. */
15784 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15786 if (ix86_check_avx256_register (&dest
, NULL
))
15788 bool *used
= (bool *) data
;
15793 /* Calculate mode of upper 128bit AVX registers after the insn. */
15796 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15798 rtx pat
= PATTERN (insn
);
15800 if (vzeroupper_operation (pat
, VOIDmode
)
15801 || vzeroall_operation (pat
, VOIDmode
))
15802 return AVX_U128_CLEAN
;
15804 /* We know that state is clean after CALL insn if there are no
15805 256bit registers used in the function return register. */
15808 bool avx_reg256_found
= false;
15809 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15810 if (!avx_reg256_found
)
15811 return AVX_U128_CLEAN
;
15814 /* Otherwise, return current mode. Remember that if insn
15815 references AVX 256bit registers, the mode was already changed
15816 to DIRTY from MODE_NEEDED. */
15820 /* Return the mode that an insn results in. */
15823 ix86_mode_after (int entity
, int mode
, rtx insn
)
15828 return ix86_avx_u128_mode_after (mode
, insn
);
15835 gcc_unreachable ();
15840 ix86_avx_u128_mode_entry (void)
15844 /* Entry mode is set to AVX_U128_DIRTY if there are
15845 256bit modes used in function arguments. */
15846 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15847 arg
= TREE_CHAIN (arg
))
15849 rtx incoming
= DECL_INCOMING_RTL (arg
);
15851 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15852 return AVX_U128_DIRTY
;
15855 return AVX_U128_CLEAN
;
15858 /* Return a mode that ENTITY is assumed to be
15859 switched to at function entry. */
15862 ix86_mode_entry (int entity
)
15867 return ix86_avx_u128_mode_entry ();
15872 return I387_CW_ANY
;
15874 gcc_unreachable ();
15879 ix86_avx_u128_mode_exit (void)
15881 rtx reg
= crtl
->return_rtx
;
15883 /* Exit mode is set to AVX_U128_DIRTY if there are
15884 256bit modes used in the function return register. */
15885 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15886 return AVX_U128_DIRTY
;
15888 return AVX_U128_CLEAN
;
15891 /* Return a mode that ENTITY is assumed to be
15892 switched to at function exit. */
15895 ix86_mode_exit (int entity
)
15900 return ix86_avx_u128_mode_exit ();
15905 return I387_CW_ANY
;
15907 gcc_unreachable ();
15911 /* Output code to initialize control word copies used by trunc?f?i and
15912 rounding patterns. CURRENT_MODE is set to current control word,
15913 while NEW_MODE is set to new control word. */
15916 emit_i387_cw_initialization (int mode
)
15918 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15921 enum ix86_stack_slot slot
;
15923 rtx reg
= gen_reg_rtx (HImode
);
15925 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15926 emit_move_insn (reg
, copy_rtx (stored_mode
));
15928 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15929 || optimize_insn_for_size_p ())
15933 case I387_CW_TRUNC
:
15934 /* round toward zero (truncate) */
15935 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15936 slot
= SLOT_CW_TRUNC
;
15939 case I387_CW_FLOOR
:
15940 /* round down toward -oo */
15941 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15942 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15943 slot
= SLOT_CW_FLOOR
;
15947 /* round up toward +oo */
15948 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15949 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15950 slot
= SLOT_CW_CEIL
;
15953 case I387_CW_MASK_PM
:
15954 /* mask precision exception for nearbyint() */
15955 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15956 slot
= SLOT_CW_MASK_PM
;
15960 gcc_unreachable ();
15967 case I387_CW_TRUNC
:
15968 /* round toward zero (truncate) */
15969 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15970 slot
= SLOT_CW_TRUNC
;
15973 case I387_CW_FLOOR
:
15974 /* round down toward -oo */
15975 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15976 slot
= SLOT_CW_FLOOR
;
15980 /* round up toward +oo */
15981 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15982 slot
= SLOT_CW_CEIL
;
15985 case I387_CW_MASK_PM
:
15986 /* mask precision exception for nearbyint() */
15987 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15988 slot
= SLOT_CW_MASK_PM
;
15992 gcc_unreachable ();
15996 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15998 new_mode
= assign_386_stack_local (HImode
, slot
);
15999 emit_move_insn (new_mode
, reg
);
16002 /* Emit vzeroupper. */
16005 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
16009 /* Cancel automatic vzeroupper insertion if there are
16010 live call-saved SSE registers at the insertion point. */
16012 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
16013 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16017 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
16018 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16021 emit_insn (gen_avx_vzeroupper ());
16024 /* Generate one or more insns to set ENTITY to MODE. */
16027 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
16032 if (mode
== AVX_U128_CLEAN
)
16033 ix86_avx_emit_vzeroupper (regs_live
);
16039 if (mode
!= I387_CW_ANY
16040 && mode
!= I387_CW_UNINITIALIZED
)
16041 emit_i387_cw_initialization (mode
);
16044 gcc_unreachable ();
16048 /* Output code for INSN to convert a float to a signed int. OPERANDS
16049 are the insn operands. The output may be [HSD]Imode and the input
16050 operand may be [SDX]Fmode. */
16053 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
16055 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16056 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
16057 int round_mode
= get_attr_i387_cw (insn
);
16059 /* Jump through a hoop or two for DImode, since the hardware has no
16060 non-popping instruction. We used to do this a different way, but
16061 that was somewhat fragile and broke with post-reload splitters. */
16062 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
16063 output_asm_insn ("fld\t%y1", operands
);
16065 gcc_assert (STACK_TOP_P (operands
[1]));
16066 gcc_assert (MEM_P (operands
[0]));
16067 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
16070 output_asm_insn ("fisttp%Z0\t%0", operands
);
16073 if (round_mode
!= I387_CW_ANY
)
16074 output_asm_insn ("fldcw\t%3", operands
);
16075 if (stack_top_dies
|| dimode_p
)
16076 output_asm_insn ("fistp%Z0\t%0", operands
);
16078 output_asm_insn ("fist%Z0\t%0", operands
);
16079 if (round_mode
!= I387_CW_ANY
)
16080 output_asm_insn ("fldcw\t%2", operands
);
16086 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16087 have the values zero or one, indicates the ffreep insn's operand
16088 from the OPERANDS array. */
16090 static const char *
16091 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16093 if (TARGET_USE_FFREEP
)
16094 #ifdef HAVE_AS_IX86_FFREEP
16095 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16098 static char retval
[32];
16099 int regno
= REGNO (operands
[opno
]);
16101 gcc_assert (STACK_REGNO_P (regno
));
16103 regno
-= FIRST_STACK_REG
;
16105 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16110 return opno
? "fstp\t%y1" : "fstp\t%y0";
16114 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16115 should be used. UNORDERED_P is true when fucom should be used. */
16118 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16120 int stack_top_dies
;
16121 rtx cmp_op0
, cmp_op1
;
16122 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16126 cmp_op0
= operands
[0];
16127 cmp_op1
= operands
[1];
16131 cmp_op0
= operands
[1];
16132 cmp_op1
= operands
[2];
16137 if (GET_MODE (operands
[0]) == SFmode
)
16139 return "%vucomiss\t{%1, %0|%0, %1}";
16141 return "%vcomiss\t{%1, %0|%0, %1}";
16144 return "%vucomisd\t{%1, %0|%0, %1}";
16146 return "%vcomisd\t{%1, %0|%0, %1}";
16149 gcc_assert (STACK_TOP_P (cmp_op0
));
16151 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16153 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16155 if (stack_top_dies
)
16157 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16158 return output_387_ffreep (operands
, 1);
16161 return "ftst\n\tfnstsw\t%0";
16164 if (STACK_REG_P (cmp_op1
)
16166 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16167 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16169 /* If both the top of the 387 stack dies, and the other operand
16170 is also a stack register that dies, then this must be a
16171 `fcompp' float compare */
16175 /* There is no double popping fcomi variant. Fortunately,
16176 eflags is immune from the fstp's cc clobbering. */
16178 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16180 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16181 return output_387_ffreep (operands
, 0);
16186 return "fucompp\n\tfnstsw\t%0";
16188 return "fcompp\n\tfnstsw\t%0";
16193 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16195 static const char * const alt
[16] =
16197 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16198 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16199 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16200 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16202 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16203 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16207 "fcomi\t{%y1, %0|%0, %y1}",
16208 "fcomip\t{%y1, %0|%0, %y1}",
16209 "fucomi\t{%y1, %0|%0, %y1}",
16210 "fucomip\t{%y1, %0|%0, %y1}",
16221 mask
= eflags_p
<< 3;
16222 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16223 mask
|= unordered_p
<< 1;
16224 mask
|= stack_top_dies
;
16226 gcc_assert (mask
< 16);
16235 ix86_output_addr_vec_elt (FILE *file
, int value
)
16237 const char *directive
= ASM_LONG
;
16241 directive
= ASM_QUAD
;
16243 gcc_assert (!TARGET_64BIT
);
16246 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16250 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16252 const char *directive
= ASM_LONG
;
16255 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16256 directive
= ASM_QUAD
;
16258 gcc_assert (!TARGET_64BIT
);
16260 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16261 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16262 fprintf (file
, "%s%s%d-%s%d\n",
16263 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16264 else if (HAVE_AS_GOTOFF_IN_DATA
)
16265 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16267 else if (TARGET_MACHO
)
16269 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16270 machopic_output_function_base_name (file
);
16275 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16276 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16279 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16283 ix86_expand_clear (rtx dest
)
16287 /* We play register width games, which are only valid after reload. */
16288 gcc_assert (reload_completed
);
16290 /* Avoid HImode and its attendant prefix byte. */
16291 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16292 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16293 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16295 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16296 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16298 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16299 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16305 /* X is an unchanging MEM. If it is a constant pool reference, return
16306 the constant pool rtx, else NULL. */
16309 maybe_get_pool_constant (rtx x
)
16311 x
= ix86_delegitimize_address (XEXP (x
, 0));
16313 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16314 return get_pool_constant (x
);
16320 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16323 enum tls_model model
;
16328 if (GET_CODE (op1
) == SYMBOL_REF
)
16332 model
= SYMBOL_REF_TLS_MODEL (op1
);
16335 op1
= legitimize_tls_address (op1
, model
, true);
16336 op1
= force_operand (op1
, op0
);
16339 op1
= convert_to_mode (mode
, op1
, 1);
16341 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16344 else if (GET_CODE (op1
) == CONST
16345 && GET_CODE (XEXP (op1
, 0)) == PLUS
16346 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16348 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16349 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16352 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16354 tmp
= legitimize_tls_address (symbol
, model
, true);
16356 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16360 tmp
= force_operand (tmp
, NULL
);
16361 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16362 op0
, 1, OPTAB_DIRECT
);
16365 op1
= convert_to_mode (mode
, tmp
, 1);
16369 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16370 && symbolic_operand (op1
, mode
))
16372 if (TARGET_MACHO
&& !TARGET_64BIT
)
16375 /* dynamic-no-pic */
16376 if (MACHOPIC_INDIRECT
)
16378 rtx temp
= ((reload_in_progress
16379 || ((op0
&& REG_P (op0
))
16381 ? op0
: gen_reg_rtx (Pmode
));
16382 op1
= machopic_indirect_data_reference (op1
, temp
);
16384 op1
= machopic_legitimize_pic_address (op1
, mode
,
16385 temp
== op1
? 0 : temp
);
16387 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16389 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16393 if (GET_CODE (op0
) == MEM
)
16394 op1
= force_reg (Pmode
, op1
);
16398 if (GET_CODE (temp
) != REG
)
16399 temp
= gen_reg_rtx (Pmode
);
16400 temp
= legitimize_pic_address (op1
, temp
);
16405 /* dynamic-no-pic */
16411 op1
= force_reg (mode
, op1
);
16412 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16414 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16415 op1
= legitimize_pic_address (op1
, reg
);
16418 op1
= convert_to_mode (mode
, op1
, 1);
16425 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16426 || !push_operand (op0
, mode
))
16428 op1
= force_reg (mode
, op1
);
16430 if (push_operand (op0
, mode
)
16431 && ! general_no_elim_operand (op1
, mode
))
16432 op1
= copy_to_mode_reg (mode
, op1
);
16434 /* Force large constants in 64bit compilation into register
16435 to get them CSEed. */
16436 if (can_create_pseudo_p ()
16437 && (mode
== DImode
) && TARGET_64BIT
16438 && immediate_operand (op1
, mode
)
16439 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16440 && !register_operand (op0
, mode
)
16442 op1
= copy_to_mode_reg (mode
, op1
);
16444 if (can_create_pseudo_p ()
16445 && FLOAT_MODE_P (mode
)
16446 && GET_CODE (op1
) == CONST_DOUBLE
)
16448 /* If we are loading a floating point constant to a register,
16449 force the value to memory now, since we'll get better code
16450 out the back end. */
16452 op1
= validize_mem (force_const_mem (mode
, op1
));
16453 if (!register_operand (op0
, mode
))
16455 rtx temp
= gen_reg_rtx (mode
);
16456 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16457 emit_move_insn (op0
, temp
);
16463 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16467 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16469 rtx op0
= operands
[0], op1
= operands
[1];
16470 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16472 /* Force constants other than zero into memory. We do not know how
16473 the instructions used to build constants modify the upper 64 bits
16474 of the register, once we have that information we may be able
16475 to handle some of them more efficiently. */
16476 if (can_create_pseudo_p ()
16477 && register_operand (op0
, mode
)
16478 && (CONSTANT_P (op1
)
16479 || (GET_CODE (op1
) == SUBREG
16480 && CONSTANT_P (SUBREG_REG (op1
))))
16481 && !standard_sse_constant_p (op1
))
16482 op1
= validize_mem (force_const_mem (mode
, op1
));
16484 /* We need to check memory alignment for SSE mode since attribute
16485 can make operands unaligned. */
16486 if (can_create_pseudo_p ()
16487 && SSE_REG_MODE_P (mode
)
16488 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16489 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16493 /* ix86_expand_vector_move_misalign() does not like constants ... */
16494 if (CONSTANT_P (op1
)
16495 || (GET_CODE (op1
) == SUBREG
16496 && CONSTANT_P (SUBREG_REG (op1
))))
16497 op1
= validize_mem (force_const_mem (mode
, op1
));
16499 /* ... nor both arguments in memory. */
16500 if (!register_operand (op0
, mode
)
16501 && !register_operand (op1
, mode
))
16502 op1
= force_reg (mode
, op1
);
16504 tmp
[0] = op0
; tmp
[1] = op1
;
16505 ix86_expand_vector_move_misalign (mode
, tmp
);
16509 /* Make operand1 a register if it isn't already. */
16510 if (can_create_pseudo_p ()
16511 && !register_operand (op0
, mode
)
16512 && !register_operand (op1
, mode
))
16514 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16518 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16521 /* Split 32-byte AVX unaligned load and store if needed. */
16524 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16527 rtx (*extract
) (rtx
, rtx
, rtx
);
16528 rtx (*load_unaligned
) (rtx
, rtx
);
16529 rtx (*store_unaligned
) (rtx
, rtx
);
16530 enum machine_mode mode
;
16532 switch (GET_MODE (op0
))
16535 gcc_unreachable ();
16537 extract
= gen_avx_vextractf128v32qi
;
16538 load_unaligned
= gen_avx_loaddquv32qi
;
16539 store_unaligned
= gen_avx_storedquv32qi
;
16543 extract
= gen_avx_vextractf128v8sf
;
16544 load_unaligned
= gen_avx_loadups256
;
16545 store_unaligned
= gen_avx_storeups256
;
16549 extract
= gen_avx_vextractf128v4df
;
16550 load_unaligned
= gen_avx_loadupd256
;
16551 store_unaligned
= gen_avx_storeupd256
;
16558 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16560 rtx r
= gen_reg_rtx (mode
);
16561 m
= adjust_address (op1
, mode
, 0);
16562 emit_move_insn (r
, m
);
16563 m
= adjust_address (op1
, mode
, 16);
16564 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16565 emit_move_insn (op0
, r
);
16567 /* Normal *mov<mode>_internal pattern will handle
16568 unaligned loads just fine if misaligned_operand
16569 is true, and without the UNSPEC it can be combined
16570 with arithmetic instructions. */
16571 else if (misaligned_operand (op1
, GET_MODE (op1
)))
16572 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16574 emit_insn (load_unaligned (op0
, op1
));
16576 else if (MEM_P (op0
))
16578 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16580 m
= adjust_address (op0
, mode
, 0);
16581 emit_insn (extract (m
, op1
, const0_rtx
));
16582 m
= adjust_address (op0
, mode
, 16);
16583 emit_insn (extract (m
, op1
, const1_rtx
));
16586 emit_insn (store_unaligned (op0
, op1
));
16589 gcc_unreachable ();
16592 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16593 straight to ix86_expand_vector_move. */
16594 /* Code generation for scalar reg-reg moves of single and double precision data:
16595 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16599 if (x86_sse_partial_reg_dependency == true)
16604 Code generation for scalar loads of double precision data:
16605 if (x86_sse_split_regs == true)
16606 movlpd mem, reg (gas syntax)
16610 Code generation for unaligned packed loads of single precision data
16611 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16612 if (x86_sse_unaligned_move_optimal)
16615 if (x86_sse_partial_reg_dependency == true)
16627 Code generation for unaligned packed loads of double precision data
16628 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16629 if (x86_sse_unaligned_move_optimal)
16632 if (x86_sse_split_regs == true)
16645 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16647 rtx op0
, op1
, orig_op0
= NULL_RTX
, m
;
16648 rtx (*load_unaligned
) (rtx
, rtx
);
16649 rtx (*store_unaligned
) (rtx
, rtx
);
16654 if (GET_MODE_SIZE (mode
) == 64)
16656 switch (GET_MODE_CLASS (mode
))
16658 case MODE_VECTOR_INT
:
16660 if (GET_MODE (op0
) != V16SImode
)
16665 op0
= gen_reg_rtx (V16SImode
);
16668 op0
= gen_lowpart (V16SImode
, op0
);
16670 op1
= gen_lowpart (V16SImode
, op1
);
16673 case MODE_VECTOR_FLOAT
:
16674 switch (GET_MODE (op0
))
16677 gcc_unreachable ();
16679 load_unaligned
= gen_avx512f_loaddquv16si
;
16680 store_unaligned
= gen_avx512f_storedquv16si
;
16683 load_unaligned
= gen_avx512f_loadups512
;
16684 store_unaligned
= gen_avx512f_storeups512
;
16687 load_unaligned
= gen_avx512f_loadupd512
;
16688 store_unaligned
= gen_avx512f_storeupd512
;
16693 emit_insn (load_unaligned (op0
, op1
));
16694 else if (MEM_P (op0
))
16695 emit_insn (store_unaligned (op0
, op1
));
16697 gcc_unreachable ();
16699 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16703 gcc_unreachable ();
16710 && GET_MODE_SIZE (mode
) == 32)
16712 switch (GET_MODE_CLASS (mode
))
16714 case MODE_VECTOR_INT
:
16716 if (GET_MODE (op0
) != V32QImode
)
16721 op0
= gen_reg_rtx (V32QImode
);
16724 op0
= gen_lowpart (V32QImode
, op0
);
16726 op1
= gen_lowpart (V32QImode
, op1
);
16729 case MODE_VECTOR_FLOAT
:
16730 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16732 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16736 gcc_unreachable ();
16744 /* Normal *mov<mode>_internal pattern will handle
16745 unaligned loads just fine if misaligned_operand
16746 is true, and without the UNSPEC it can be combined
16747 with arithmetic instructions. */
16749 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
16750 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
16751 && misaligned_operand (op1
, GET_MODE (op1
)))
16752 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16753 /* ??? If we have typed data, then it would appear that using
16754 movdqu is the only way to get unaligned data loaded with
16756 else if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16758 if (GET_MODE (op0
) != V16QImode
)
16761 op0
= gen_reg_rtx (V16QImode
);
16763 op1
= gen_lowpart (V16QImode
, op1
);
16764 /* We will eventually emit movups based on insn attributes. */
16765 emit_insn (gen_sse2_loaddquv16qi (op0
, op1
));
16767 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16769 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16774 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16775 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16776 || optimize_insn_for_size_p ())
16778 /* We will eventually emit movups based on insn attributes. */
16779 emit_insn (gen_sse2_loadupd (op0
, op1
));
16783 /* When SSE registers are split into halves, we can avoid
16784 writing to the top half twice. */
16785 if (TARGET_SSE_SPLIT_REGS
)
16787 emit_clobber (op0
);
16792 /* ??? Not sure about the best option for the Intel chips.
16793 The following would seem to satisfy; the register is
16794 entirely cleared, breaking the dependency chain. We
16795 then store to the upper half, with a dependency depth
16796 of one. A rumor has it that Intel recommends two movsd
16797 followed by an unpacklpd, but this is unconfirmed. And
16798 given that the dependency depth of the unpacklpd would
16799 still be one, I'm not sure why this would be better. */
16800 zero
= CONST0_RTX (V2DFmode
);
16803 m
= adjust_address (op1
, DFmode
, 0);
16804 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16805 m
= adjust_address (op1
, DFmode
, 8);
16806 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16813 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16814 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16815 || optimize_insn_for_size_p ())
16817 if (GET_MODE (op0
) != V4SFmode
)
16820 op0
= gen_reg_rtx (V4SFmode
);
16822 op1
= gen_lowpart (V4SFmode
, op1
);
16823 emit_insn (gen_sse_loadups (op0
, op1
));
16825 emit_move_insn (orig_op0
,
16826 gen_lowpart (GET_MODE (orig_op0
), op0
));
16830 if (mode
!= V4SFmode
)
16831 t
= gen_reg_rtx (V4SFmode
);
16835 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16836 emit_move_insn (t
, CONST0_RTX (V4SFmode
));
16840 m
= adjust_address (op1
, V2SFmode
, 0);
16841 emit_insn (gen_sse_loadlps (t
, t
, m
));
16842 m
= adjust_address (op1
, V2SFmode
, 8);
16843 emit_insn (gen_sse_loadhps (t
, t
, m
));
16844 if (mode
!= V4SFmode
)
16845 emit_move_insn (op0
, gen_lowpart (mode
, t
));
16848 else if (MEM_P (op0
))
16850 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16852 op0
= gen_lowpart (V16QImode
, op0
);
16853 op1
= gen_lowpart (V16QImode
, op1
);
16854 /* We will eventually emit movups based on insn attributes. */
16855 emit_insn (gen_sse2_storedquv16qi (op0
, op1
));
16857 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16860 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16861 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16862 || optimize_insn_for_size_p ())
16863 /* We will eventually emit movups based on insn attributes. */
16864 emit_insn (gen_sse2_storeupd (op0
, op1
));
16867 m
= adjust_address (op0
, DFmode
, 0);
16868 emit_insn (gen_sse2_storelpd (m
, op1
));
16869 m
= adjust_address (op0
, DFmode
, 8);
16870 emit_insn (gen_sse2_storehpd (m
, op1
));
16875 if (mode
!= V4SFmode
)
16876 op1
= gen_lowpart (V4SFmode
, op1
);
16879 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16880 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16881 || optimize_insn_for_size_p ())
16883 op0
= gen_lowpart (V4SFmode
, op0
);
16884 emit_insn (gen_sse_storeups (op0
, op1
));
16888 m
= adjust_address (op0
, V2SFmode
, 0);
16889 emit_insn (gen_sse_storelps (m
, op1
));
16890 m
= adjust_address (op0
, V2SFmode
, 8);
16891 emit_insn (gen_sse_storehps (m
, op1
));
16896 gcc_unreachable ();
16899 /* Expand a push in MODE. This is some mode for which we do not support
16900 proper push instructions, at least from the registers that we expect
16901 the value to live in. */
16904 ix86_expand_push (enum machine_mode mode
, rtx x
)
16908 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16909 GEN_INT (-GET_MODE_SIZE (mode
)),
16910 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16911 if (tmp
!= stack_pointer_rtx
)
16912 emit_move_insn (stack_pointer_rtx
, tmp
);
16914 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16916 /* When we push an operand onto stack, it has to be aligned at least
16917 at the function argument boundary. However since we don't have
16918 the argument type, we can't determine the actual argument
16920 emit_move_insn (tmp
, x
);
16923 /* Helper function of ix86_fixup_binary_operands to canonicalize
16924 operand order. Returns true if the operands should be swapped. */
16927 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16930 rtx dst
= operands
[0];
16931 rtx src1
= operands
[1];
16932 rtx src2
= operands
[2];
16934 /* If the operation is not commutative, we can't do anything. */
16935 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16938 /* Highest priority is that src1 should match dst. */
16939 if (rtx_equal_p (dst
, src1
))
16941 if (rtx_equal_p (dst
, src2
))
16944 /* Next highest priority is that immediate constants come second. */
16945 if (immediate_operand (src2
, mode
))
16947 if (immediate_operand (src1
, mode
))
16950 /* Lowest priority is that memory references should come second. */
16960 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16961 destination to use for the operation. If different from the true
16962 destination in operands[0], a copy operation will be required. */
16965 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16968 rtx dst
= operands
[0];
16969 rtx src1
= operands
[1];
16970 rtx src2
= operands
[2];
16972 /* Canonicalize operand order. */
16973 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16977 /* It is invalid to swap operands of different modes. */
16978 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16985 /* Both source operands cannot be in memory. */
16986 if (MEM_P (src1
) && MEM_P (src2
))
16988 /* Optimization: Only read from memory once. */
16989 if (rtx_equal_p (src1
, src2
))
16991 src2
= force_reg (mode
, src2
);
16994 else if (rtx_equal_p (dst
, src1
))
16995 src2
= force_reg (mode
, src2
);
16997 src1
= force_reg (mode
, src1
);
17000 /* If the destination is memory, and we do not have matching source
17001 operands, do things in registers. */
17002 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17003 dst
= gen_reg_rtx (mode
);
17005 /* Source 1 cannot be a constant. */
17006 if (CONSTANT_P (src1
))
17007 src1
= force_reg (mode
, src1
);
17009 /* Source 1 cannot be a non-matching memory. */
17010 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17011 src1
= force_reg (mode
, src1
);
17013 /* Improve address combine. */
17015 && GET_MODE_CLASS (mode
) == MODE_INT
17017 src2
= force_reg (mode
, src2
);
17019 operands
[1] = src1
;
17020 operands
[2] = src2
;
17024 /* Similarly, but assume that the destination has already been
17025 set up properly. */
17028 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
17029 enum machine_mode mode
, rtx operands
[])
17031 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17032 gcc_assert (dst
== operands
[0]);
17035 /* Attempt to expand a binary operator. Make the expansion closer to the
17036 actual machine, then just general_operand, which will allow 3 separate
17037 memory references (one output, two input) in a single insn. */
17040 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
17043 rtx src1
, src2
, dst
, op
, clob
;
17045 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17046 src1
= operands
[1];
17047 src2
= operands
[2];
17049 /* Emit the instruction. */
17051 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
17052 if (reload_in_progress
)
17054 /* Reload doesn't know about the flags register, and doesn't know that
17055 it doesn't want to clobber it. We can only do this with PLUS. */
17056 gcc_assert (code
== PLUS
);
17059 else if (reload_completed
17061 && !rtx_equal_p (dst
, src1
))
17063 /* This is going to be an LEA; avoid splitting it later. */
17068 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17069 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17072 /* Fix up the destination if needed. */
17073 if (dst
!= operands
[0])
17074 emit_move_insn (operands
[0], dst
);
17077 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17078 the given OPERANDS. */
17081 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
17084 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
17085 if (GET_CODE (operands
[1]) == SUBREG
)
17090 else if (GET_CODE (operands
[2]) == SUBREG
)
17095 /* Optimize (__m128i) d | (__m128i) e and similar code
17096 when d and e are float vectors into float vector logical
17097 insn. In C/C++ without using intrinsics there is no other way
17098 to express vector logical operation on float vectors than
17099 to cast them temporarily to integer vectors. */
17101 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17102 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
17103 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
17104 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
17105 && SUBREG_BYTE (op1
) == 0
17106 && (GET_CODE (op2
) == CONST_VECTOR
17107 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
17108 && SUBREG_BYTE (op2
) == 0))
17109 && can_create_pseudo_p ())
17112 switch (GET_MODE (SUBREG_REG (op1
)))
17118 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
17119 if (GET_CODE (op2
) == CONST_VECTOR
)
17121 op2
= gen_lowpart (GET_MODE (dst
), op2
);
17122 op2
= force_reg (GET_MODE (dst
), op2
);
17127 op2
= SUBREG_REG (operands
[2]);
17128 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
17129 op2
= force_reg (GET_MODE (dst
), op2
);
17131 op1
= SUBREG_REG (op1
);
17132 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
17133 op1
= force_reg (GET_MODE (dst
), op1
);
17134 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
17135 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
17137 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
17143 if (!nonimmediate_operand (operands
[1], mode
))
17144 operands
[1] = force_reg (mode
, operands
[1]);
17145 if (!nonimmediate_operand (operands
[2], mode
))
17146 operands
[2] = force_reg (mode
, operands
[2]);
17147 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
17148 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
17149 gen_rtx_fmt_ee (code
, mode
, operands
[1],
17153 /* Return TRUE or FALSE depending on whether the binary operator meets the
17154 appropriate constraints. */
17157 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
17160 rtx dst
= operands
[0];
17161 rtx src1
= operands
[1];
17162 rtx src2
= operands
[2];
17164 /* Both source operands cannot be in memory. */
17165 if (MEM_P (src1
) && MEM_P (src2
))
17168 /* Canonicalize operand order for commutative operators. */
17169 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17176 /* If the destination is memory, we must have a matching source operand. */
17177 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17180 /* Source 1 cannot be a constant. */
17181 if (CONSTANT_P (src1
))
17184 /* Source 1 cannot be a non-matching memory. */
17185 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17186 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17187 return (code
== AND
17190 || (TARGET_64BIT
&& mode
== DImode
))
17191 && satisfies_constraint_L (src2
));
17196 /* Attempt to expand a unary operator. Make the expansion closer to the
17197 actual machine, then just general_operand, which will allow 2 separate
17198 memory references (one output, one input) in a single insn. */
17201 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17204 int matching_memory
;
17205 rtx src
, dst
, op
, clob
;
17210 /* If the destination is memory, and we do not have matching source
17211 operands, do things in registers. */
17212 matching_memory
= 0;
17215 if (rtx_equal_p (dst
, src
))
17216 matching_memory
= 1;
17218 dst
= gen_reg_rtx (mode
);
17221 /* When source operand is memory, destination must match. */
17222 if (MEM_P (src
) && !matching_memory
)
17223 src
= force_reg (mode
, src
);
17225 /* Emit the instruction. */
17227 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17228 if (reload_in_progress
|| code
== NOT
)
17230 /* Reload doesn't know about the flags register, and doesn't know that
17231 it doesn't want to clobber it. */
17232 gcc_assert (code
== NOT
);
17237 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17238 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17241 /* Fix up the destination if needed. */
17242 if (dst
!= operands
[0])
17243 emit_move_insn (operands
[0], dst
);
17246 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17247 divisor are within the range [0-255]. */
17250 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17253 rtx end_label
, qimode_label
;
17254 rtx insn
, div
, mod
;
17255 rtx scratch
, tmp0
, tmp1
, tmp2
;
17256 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17257 rtx (*gen_zero_extend
) (rtx
, rtx
);
17258 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17263 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17264 gen_test_ccno_1
= gen_testsi_ccno_1
;
17265 gen_zero_extend
= gen_zero_extendqisi2
;
17268 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17269 gen_test_ccno_1
= gen_testdi_ccno_1
;
17270 gen_zero_extend
= gen_zero_extendqidi2
;
17273 gcc_unreachable ();
17276 end_label
= gen_label_rtx ();
17277 qimode_label
= gen_label_rtx ();
17279 scratch
= gen_reg_rtx (mode
);
17281 /* Use 8bit unsigned divimod if dividend and divisor are within
17282 the range [0-255]. */
17283 emit_move_insn (scratch
, operands
[2]);
17284 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17285 scratch
, 1, OPTAB_DIRECT
);
17286 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17287 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17288 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17289 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17290 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17292 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17293 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17294 JUMP_LABEL (insn
) = qimode_label
;
17296 /* Generate original signed/unsigned divimod. */
17297 div
= gen_divmod4_1 (operands
[0], operands
[1],
17298 operands
[2], operands
[3]);
17301 /* Branch to the end. */
17302 emit_jump_insn (gen_jump (end_label
));
17305 /* Generate 8bit unsigned divide. */
17306 emit_label (qimode_label
);
17307 /* Don't use operands[0] for result of 8bit divide since not all
17308 registers support QImode ZERO_EXTRACT. */
17309 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17310 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17311 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17312 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17316 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17317 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17321 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17322 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17325 /* Extract remainder from AH. */
17326 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17327 if (REG_P (operands
[1]))
17328 insn
= emit_move_insn (operands
[1], tmp1
);
17331 /* Need a new scratch register since the old one has result
17333 scratch
= gen_reg_rtx (mode
);
17334 emit_move_insn (scratch
, tmp1
);
17335 insn
= emit_move_insn (operands
[1], scratch
);
17337 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17339 /* Zero extend quotient from AL. */
17340 tmp1
= gen_lowpart (QImode
, tmp0
);
17341 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17342 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17344 emit_label (end_label
);
17347 #define LEA_MAX_STALL (3)
17348 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17350 /* Increase given DISTANCE in half-cycles according to
17351 dependencies between PREV and NEXT instructions.
17352 Add 1 half-cycle if there is no dependency and
17353 go to next cycle if there is some dependecy. */
17355 static unsigned int
17356 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17361 if (!prev
|| !next
)
17362 return distance
+ (distance
& 1) + 2;
17364 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17365 return distance
+ 1;
17367 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17368 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17369 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17370 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17371 return distance
+ (distance
& 1) + 2;
17373 return distance
+ 1;
17376 /* Function checks if instruction INSN defines register number
17377 REGNO1 or REGNO2. */
17380 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17385 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17386 if (DF_REF_REG_DEF_P (*def_rec
)
17387 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17388 && (regno1
== DF_REF_REGNO (*def_rec
)
17389 || regno2
== DF_REF_REGNO (*def_rec
)))
17397 /* Function checks if instruction INSN uses register number
17398 REGNO as a part of address expression. */
17401 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17405 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17406 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17412 /* Search backward for non-agu definition of register number REGNO1
17413 or register number REGNO2 in basic block starting from instruction
17414 START up to head of basic block or instruction INSN.
17416 Function puts true value into *FOUND var if definition was found
17417 and false otherwise.
17419 Distance in half-cycles between START and found instruction or head
17420 of BB is added to DISTANCE and returned. */
17423 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17424 rtx insn
, int distance
,
17425 rtx start
, bool *found
)
17427 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17435 && distance
< LEA_SEARCH_THRESHOLD
)
17437 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17439 distance
= increase_distance (prev
, next
, distance
);
17440 if (insn_defines_reg (regno1
, regno2
, prev
))
17442 if (recog_memoized (prev
) < 0
17443 || get_attr_type (prev
) != TYPE_LEA
)
17452 if (prev
== BB_HEAD (bb
))
17455 prev
= PREV_INSN (prev
);
17461 /* Search backward for non-agu definition of register number REGNO1
17462 or register number REGNO2 in INSN's basic block until
17463 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17464 2. Reach neighbour BBs boundary, or
17465 3. Reach agu definition.
17466 Returns the distance between the non-agu definition point and INSN.
17467 If no definition point, returns -1. */
17470 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17473 basic_block bb
= BLOCK_FOR_INSN (insn
);
17475 bool found
= false;
17477 if (insn
!= BB_HEAD (bb
))
17478 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17479 distance
, PREV_INSN (insn
),
17482 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17486 bool simple_loop
= false;
17488 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17491 simple_loop
= true;
17496 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17498 BB_END (bb
), &found
);
17501 int shortest_dist
= -1;
17502 bool found_in_bb
= false;
17504 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17507 = distance_non_agu_define_in_bb (regno1
, regno2
,
17513 if (shortest_dist
< 0)
17514 shortest_dist
= bb_dist
;
17515 else if (bb_dist
> 0)
17516 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17522 distance
= shortest_dist
;
17526 /* get_attr_type may modify recog data. We want to make sure
17527 that recog data is valid for instruction INSN, on which
17528 distance_non_agu_define is called. INSN is unchanged here. */
17529 extract_insn_cached (insn
);
17534 return distance
>> 1;
17537 /* Return the distance in half-cycles between INSN and the next
17538 insn that uses register number REGNO in memory address added
17539 to DISTANCE. Return -1 if REGNO0 is set.
17541 Put true value into *FOUND if register usage was found and
17543 Put true value into *REDEFINED if register redefinition was
17544 found and false otherwise. */
17547 distance_agu_use_in_bb (unsigned int regno
,
17548 rtx insn
, int distance
, rtx start
,
17549 bool *found
, bool *redefined
)
17551 basic_block bb
= NULL
;
17556 *redefined
= false;
17558 if (start
!= NULL_RTX
)
17560 bb
= BLOCK_FOR_INSN (start
);
17561 if (start
!= BB_HEAD (bb
))
17562 /* If insn and start belong to the same bb, set prev to insn,
17563 so the call to increase_distance will increase the distance
17564 between insns by 1. */
17570 && distance
< LEA_SEARCH_THRESHOLD
)
17572 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17574 distance
= increase_distance(prev
, next
, distance
);
17575 if (insn_uses_reg_mem (regno
, next
))
17577 /* Return DISTANCE if OP0 is used in memory
17578 address in NEXT. */
17583 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17585 /* Return -1 if OP0 is set in NEXT. */
17593 if (next
== BB_END (bb
))
17596 next
= NEXT_INSN (next
);
17602 /* Return the distance between INSN and the next insn that uses
17603 register number REGNO0 in memory address. Return -1 if no such
17604 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17607 distance_agu_use (unsigned int regno0
, rtx insn
)
17609 basic_block bb
= BLOCK_FOR_INSN (insn
);
17611 bool found
= false;
17612 bool redefined
= false;
17614 if (insn
!= BB_END (bb
))
17615 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17617 &found
, &redefined
);
17619 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17623 bool simple_loop
= false;
17625 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17628 simple_loop
= true;
17633 distance
= distance_agu_use_in_bb (regno0
, insn
,
17634 distance
, BB_HEAD (bb
),
17635 &found
, &redefined
);
17638 int shortest_dist
= -1;
17639 bool found_in_bb
= false;
17640 bool redefined_in_bb
= false;
17642 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17645 = distance_agu_use_in_bb (regno0
, insn
,
17646 distance
, BB_HEAD (e
->dest
),
17647 &found_in_bb
, &redefined_in_bb
);
17650 if (shortest_dist
< 0)
17651 shortest_dist
= bb_dist
;
17652 else if (bb_dist
> 0)
17653 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17659 distance
= shortest_dist
;
17663 if (!found
|| redefined
)
17666 return distance
>> 1;
17669 /* Define this macro to tune LEA priority vs ADD, it take effect when
17670 there is a dilemma of choicing LEA or ADD
17671 Negative value: ADD is more preferred than LEA
17673 Positive value: LEA is more preferred than ADD*/
17674 #define IX86_LEA_PRIORITY 0
17676 /* Return true if usage of lea INSN has performance advantage
17677 over a sequence of instructions. Instructions sequence has
17678 SPLIT_COST cycles higher latency than lea latency. */
17681 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17682 unsigned int regno2
, int split_cost
, bool has_scale
)
17684 int dist_define
, dist_use
;
17686 /* For Silvermont if using a 2-source or 3-source LEA for
17687 non-destructive destination purposes, or due to wanting
17688 ability to use SCALE, the use of LEA is justified. */
17689 if (ix86_tune
== PROCESSOR_SLM
)
17693 if (split_cost
< 1)
17695 if (regno0
== regno1
|| regno0
== regno2
)
17700 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17701 dist_use
= distance_agu_use (regno0
, insn
);
17703 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17705 /* If there is no non AGU operand definition, no AGU
17706 operand usage and split cost is 0 then both lea
17707 and non lea variants have same priority. Currently
17708 we prefer lea for 64 bit code and non lea on 32 bit
17710 if (dist_use
< 0 && split_cost
== 0)
17711 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17716 /* With longer definitions distance lea is more preferable.
17717 Here we change it to take into account splitting cost and
17719 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17721 /* If there is no use in memory addess then we just check
17722 that split cost exceeds AGU stall. */
17724 return dist_define
> LEA_MAX_STALL
;
17726 /* If this insn has both backward non-agu dependence and forward
17727 agu dependence, the one with short distance takes effect. */
17728 return dist_define
>= dist_use
;
17731 /* Return true if it is legal to clobber flags by INSN and
17732 false otherwise. */
17735 ix86_ok_to_clobber_flags (rtx insn
)
17737 basic_block bb
= BLOCK_FOR_INSN (insn
);
17743 if (NONDEBUG_INSN_P (insn
))
17745 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17746 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17749 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17753 if (insn
== BB_END (bb
))
17756 insn
= NEXT_INSN (insn
);
17759 live
= df_get_live_out(bb
);
17760 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17763 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17764 move and add to avoid AGU stalls. */
17767 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17769 unsigned int regno0
, regno1
, regno2
;
17771 /* Check if we need to optimize. */
17772 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17775 /* Check it is correct to split here. */
17776 if (!ix86_ok_to_clobber_flags(insn
))
17779 regno0
= true_regnum (operands
[0]);
17780 regno1
= true_regnum (operands
[1]);
17781 regno2
= true_regnum (operands
[2]);
17783 /* We need to split only adds with non destructive
17784 destination operand. */
17785 if (regno0
== regno1
|| regno0
== regno2
)
17788 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17791 /* Return true if we should emit lea instruction instead of mov
17795 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17797 unsigned int regno0
, regno1
;
17799 /* Check if we need to optimize. */
17800 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17803 /* Use lea for reg to reg moves only. */
17804 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17807 regno0
= true_regnum (operands
[0]);
17808 regno1
= true_regnum (operands
[1]);
17810 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17813 /* Return true if we need to split lea into a sequence of
17814 instructions to avoid AGU stalls. */
17817 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17819 unsigned int regno0
, regno1
, regno2
;
17821 struct ix86_address parts
;
17824 /* Check we need to optimize. */
17825 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17828 /* Check it is correct to split here. */
17829 if (!ix86_ok_to_clobber_flags(insn
))
17832 ok
= ix86_decompose_address (operands
[1], &parts
);
17835 /* There should be at least two components in the address. */
17836 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17837 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17840 /* We should not split into add if non legitimate pic
17841 operand is used as displacement. */
17842 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17845 regno0
= true_regnum (operands
[0]) ;
17846 regno1
= INVALID_REGNUM
;
17847 regno2
= INVALID_REGNUM
;
17850 regno1
= true_regnum (parts
.base
);
17852 regno2
= true_regnum (parts
.index
);
17856 /* Compute how many cycles we will add to execution time
17857 if split lea into a sequence of instructions. */
17858 if (parts
.base
|| parts
.index
)
17860 /* Have to use mov instruction if non desctructive
17861 destination form is used. */
17862 if (regno1
!= regno0
&& regno2
!= regno0
)
17865 /* Have to add index to base if both exist. */
17866 if (parts
.base
&& parts
.index
)
17869 /* Have to use shift and adds if scale is 2 or greater. */
17870 if (parts
.scale
> 1)
17872 if (regno0
!= regno1
)
17874 else if (regno2
== regno0
)
17877 split_cost
+= parts
.scale
;
17880 /* Have to use add instruction with immediate if
17881 disp is non zero. */
17882 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17885 /* Subtract the price of lea. */
17889 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
17893 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17894 matches destination. RTX includes clobber of FLAGS_REG. */
17897 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17902 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17903 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17905 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17908 /* Return true if regno1 def is nearest to the insn. */
17911 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17914 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17918 while (prev
&& prev
!= start
)
17920 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17922 prev
= PREV_INSN (prev
);
17925 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17927 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17929 prev
= PREV_INSN (prev
);
17932 /* None of the regs is defined in the bb. */
17936 /* Split lea instructions into a sequence of instructions
17937 which are executed on ALU to avoid AGU stalls.
17938 It is assumed that it is allowed to clobber flags register
17939 at lea position. */
17942 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17944 unsigned int regno0
, regno1
, regno2
;
17945 struct ix86_address parts
;
17949 ok
= ix86_decompose_address (operands
[1], &parts
);
17952 target
= gen_lowpart (mode
, operands
[0]);
17954 regno0
= true_regnum (target
);
17955 regno1
= INVALID_REGNUM
;
17956 regno2
= INVALID_REGNUM
;
17960 parts
.base
= gen_lowpart (mode
, parts
.base
);
17961 regno1
= true_regnum (parts
.base
);
17966 parts
.index
= gen_lowpart (mode
, parts
.index
);
17967 regno2
= true_regnum (parts
.index
);
17971 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17973 if (parts
.scale
> 1)
17975 /* Case r1 = r1 + ... */
17976 if (regno1
== regno0
)
17978 /* If we have a case r1 = r1 + C * r1 then we
17979 should use multiplication which is very
17980 expensive. Assume cost model is wrong if we
17981 have such case here. */
17982 gcc_assert (regno2
!= regno0
);
17984 for (adds
= parts
.scale
; adds
> 0; adds
--)
17985 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17989 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17990 if (regno0
!= regno2
)
17991 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17993 /* Use shift for scaling. */
17994 ix86_emit_binop (ASHIFT
, mode
, target
,
17995 GEN_INT (exact_log2 (parts
.scale
)));
17998 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
18000 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18001 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18004 else if (!parts
.base
&& !parts
.index
)
18006 gcc_assert(parts
.disp
);
18007 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
18013 if (regno0
!= regno2
)
18014 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18016 else if (!parts
.index
)
18018 if (regno0
!= regno1
)
18019 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
18023 if (regno0
== regno1
)
18025 else if (regno0
== regno2
)
18031 /* Find better operand for SET instruction, depending
18032 on which definition is farther from the insn. */
18033 if (find_nearest_reg_def (insn
, regno1
, regno2
))
18034 tmp
= parts
.index
, tmp1
= parts
.base
;
18036 tmp
= parts
.base
, tmp1
= parts
.index
;
18038 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18040 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18041 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18043 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
18047 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
18050 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18051 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18055 /* Return true if it is ok to optimize an ADD operation to LEA
18056 operation to avoid flag register consumation. For most processors,
18057 ADD is faster than LEA. For the processors like ATOM, if the
18058 destination register of LEA holds an actual address which will be
18059 used soon, LEA is better and otherwise ADD is better. */
18062 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
18064 unsigned int regno0
= true_regnum (operands
[0]);
18065 unsigned int regno1
= true_regnum (operands
[1]);
18066 unsigned int regno2
= true_regnum (operands
[2]);
18068 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18069 if (regno0
!= regno1
&& regno0
!= regno2
)
18072 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18075 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
18078 /* Return true if destination reg of SET_BODY is shift count of
18082 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
18088 /* Retrieve destination of SET_BODY. */
18089 switch (GET_CODE (set_body
))
18092 set_dest
= SET_DEST (set_body
);
18093 if (!set_dest
|| !REG_P (set_dest
))
18097 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
18098 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
18106 /* Retrieve shift count of USE_BODY. */
18107 switch (GET_CODE (use_body
))
18110 shift_rtx
= XEXP (use_body
, 1);
18113 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
18114 if (ix86_dep_by_shift_count_body (set_body
,
18115 XVECEXP (use_body
, 0, i
)))
18123 && (GET_CODE (shift_rtx
) == ASHIFT
18124 || GET_CODE (shift_rtx
) == LSHIFTRT
18125 || GET_CODE (shift_rtx
) == ASHIFTRT
18126 || GET_CODE (shift_rtx
) == ROTATE
18127 || GET_CODE (shift_rtx
) == ROTATERT
))
18129 rtx shift_count
= XEXP (shift_rtx
, 1);
18131 /* Return true if shift count is dest of SET_BODY. */
18132 if (REG_P (shift_count
))
18134 /* Add check since it can be invoked before register
18135 allocation in pre-reload schedule. */
18136 if (reload_completed
18137 && true_regnum (set_dest
) == true_regnum (shift_count
))
18139 else if (REGNO(set_dest
) == REGNO(shift_count
))
18147 /* Return true if destination reg of SET_INSN is shift count of
18151 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
18153 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
18154 PATTERN (use_insn
));
18157 /* Return TRUE or FALSE depending on whether the unary operator meets the
18158 appropriate constraints. */
18161 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
18162 enum machine_mode mode ATTRIBUTE_UNUSED
,
18165 /* If one of operands is memory, source and destination must match. */
18166 if ((MEM_P (operands
[0])
18167 || MEM_P (operands
[1]))
18168 && ! rtx_equal_p (operands
[0], operands
[1]))
18173 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18174 are ok, keeping in mind the possible movddup alternative. */
18177 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
18179 if (MEM_P (operands
[0]))
18180 return rtx_equal_p (operands
[0], operands
[1 + high
]);
18181 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
18182 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18186 /* Post-reload splitter for converting an SF or DFmode value in an
18187 SSE register into an unsigned SImode. */
18190 ix86_split_convert_uns_si_sse (rtx operands
[])
18192 enum machine_mode vecmode
;
18193 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18195 large
= operands
[1];
18196 zero_or_two31
= operands
[2];
18197 input
= operands
[3];
18198 two31
= operands
[4];
18199 vecmode
= GET_MODE (large
);
18200 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18202 /* Load up the value into the low element. We must ensure that the other
18203 elements are valid floats -- zero is the easiest such value. */
18206 if (vecmode
== V4SFmode
)
18207 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18209 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18213 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18214 emit_move_insn (value
, CONST0_RTX (vecmode
));
18215 if (vecmode
== V4SFmode
)
18216 emit_insn (gen_sse_movss (value
, value
, input
));
18218 emit_insn (gen_sse2_movsd (value
, value
, input
));
18221 emit_move_insn (large
, two31
);
18222 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18224 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18225 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18227 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18228 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18230 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18231 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18233 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18234 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18236 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18237 if (vecmode
== V4SFmode
)
18238 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18240 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18243 emit_insn (gen_xorv4si3 (value
, value
, large
));
18246 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18247 Expects the 64-bit DImode to be supplied in a pair of integral
18248 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18249 -mfpmath=sse, !optimize_size only. */
18252 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18254 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18255 rtx int_xmm
, fp_xmm
;
18256 rtx biases
, exponents
;
18259 int_xmm
= gen_reg_rtx (V4SImode
);
18260 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18261 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18262 else if (TARGET_SSE_SPLIT_REGS
)
18264 emit_clobber (int_xmm
);
18265 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18269 x
= gen_reg_rtx (V2DImode
);
18270 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18271 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18274 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18275 gen_rtvec (4, GEN_INT (0x43300000UL
),
18276 GEN_INT (0x45300000UL
),
18277 const0_rtx
, const0_rtx
));
18278 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18280 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18281 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18283 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18284 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18285 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18286 (0x1.0p84 + double(fp_value_hi_xmm)).
18287 Note these exponents differ by 32. */
18289 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18291 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18292 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18293 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18294 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18295 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18296 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18297 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18298 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18299 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18301 /* Add the upper and lower DFmode values together. */
18303 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18306 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18307 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18308 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18311 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18314 /* Not used, but eases macroization of patterns. */
18316 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18317 rtx input ATTRIBUTE_UNUSED
)
18319 gcc_unreachable ();
18322 /* Convert an unsigned SImode value into a DFmode. Only currently used
18323 for SSE, but applicable anywhere. */
18326 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18328 REAL_VALUE_TYPE TWO31r
;
18331 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18332 NULL
, 1, OPTAB_DIRECT
);
18334 fp
= gen_reg_rtx (DFmode
);
18335 emit_insn (gen_floatsidf2 (fp
, x
));
18337 real_ldexp (&TWO31r
, &dconst1
, 31);
18338 x
= const_double_from_real_value (TWO31r
, DFmode
);
18340 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18342 emit_move_insn (target
, x
);
18345 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18346 32-bit mode; otherwise we have a direct convert instruction. */
18349 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18351 REAL_VALUE_TYPE TWO32r
;
18352 rtx fp_lo
, fp_hi
, x
;
18354 fp_lo
= gen_reg_rtx (DFmode
);
18355 fp_hi
= gen_reg_rtx (DFmode
);
18357 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18359 real_ldexp (&TWO32r
, &dconst1
, 32);
18360 x
= const_double_from_real_value (TWO32r
, DFmode
);
18361 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18363 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18365 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18368 emit_move_insn (target
, x
);
18371 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18372 For x86_32, -mfpmath=sse, !optimize_size only. */
18374 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18376 REAL_VALUE_TYPE ONE16r
;
18377 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18379 real_ldexp (&ONE16r
, &dconst1
, 16);
18380 x
= const_double_from_real_value (ONE16r
, SFmode
);
18381 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18382 NULL
, 0, OPTAB_DIRECT
);
18383 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18384 NULL
, 0, OPTAB_DIRECT
);
18385 fp_hi
= gen_reg_rtx (SFmode
);
18386 fp_lo
= gen_reg_rtx (SFmode
);
18387 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18388 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18389 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18391 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18393 if (!rtx_equal_p (target
, fp_hi
))
18394 emit_move_insn (target
, fp_hi
);
18397 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18398 a vector of unsigned ints VAL to vector of floats TARGET. */
18401 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18404 REAL_VALUE_TYPE TWO16r
;
18405 enum machine_mode intmode
= GET_MODE (val
);
18406 enum machine_mode fltmode
= GET_MODE (target
);
18407 rtx (*cvt
) (rtx
, rtx
);
18409 if (intmode
== V4SImode
)
18410 cvt
= gen_floatv4siv4sf2
;
18412 cvt
= gen_floatv8siv8sf2
;
18413 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18414 tmp
[0] = force_reg (intmode
, tmp
[0]);
18415 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18417 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18418 NULL_RTX
, 1, OPTAB_DIRECT
);
18419 tmp
[3] = gen_reg_rtx (fltmode
);
18420 emit_insn (cvt (tmp
[3], tmp
[1]));
18421 tmp
[4] = gen_reg_rtx (fltmode
);
18422 emit_insn (cvt (tmp
[4], tmp
[2]));
18423 real_ldexp (&TWO16r
, &dconst1
, 16);
18424 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18425 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18426 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18428 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18430 if (tmp
[7] != target
)
18431 emit_move_insn (target
, tmp
[7]);
18434 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18435 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18436 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18437 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18440 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18442 REAL_VALUE_TYPE TWO31r
;
18443 rtx two31r
, tmp
[4];
18444 enum machine_mode mode
= GET_MODE (val
);
18445 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18446 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18447 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18450 for (i
= 0; i
< 3; i
++)
18451 tmp
[i
] = gen_reg_rtx (mode
);
18452 real_ldexp (&TWO31r
, &dconst1
, 31);
18453 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18454 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18455 two31r
= force_reg (mode
, two31r
);
18458 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18459 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18460 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18461 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18462 default: gcc_unreachable ();
18464 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18465 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18466 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18468 if (intmode
== V4SImode
|| TARGET_AVX2
)
18469 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18470 gen_lowpart (intmode
, tmp
[0]),
18471 GEN_INT (31), NULL_RTX
, 0,
18475 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18476 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18477 *xorp
= expand_simple_binop (intmode
, AND
,
18478 gen_lowpart (intmode
, tmp
[0]),
18479 two31
, NULL_RTX
, 0,
18482 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18486 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18487 then replicate the value for all elements of the vector
18491 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18495 enum machine_mode scalar_mode
;
18512 n_elt
= GET_MODE_NUNITS (mode
);
18513 v
= rtvec_alloc (n_elt
);
18514 scalar_mode
= GET_MODE_INNER (mode
);
18516 RTVEC_ELT (v
, 0) = value
;
18518 for (i
= 1; i
< n_elt
; ++i
)
18519 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18521 return gen_rtx_CONST_VECTOR (mode
, v
);
18524 gcc_unreachable ();
18528 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18529 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18530 for an SSE register. If VECT is true, then replicate the mask for
18531 all elements of the vector register. If INVERT is true, then create
18532 a mask excluding the sign bit. */
18535 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18537 enum machine_mode vec_mode
, imode
;
18538 HOST_WIDE_INT hi
, lo
;
18543 /* Find the sign bit, sign extended to 2*HWI. */
18551 mode
= GET_MODE_INNER (mode
);
18553 lo
= 0x80000000, hi
= lo
< 0;
18561 mode
= GET_MODE_INNER (mode
);
18563 if (HOST_BITS_PER_WIDE_INT
>= 64)
18564 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18566 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18571 vec_mode
= VOIDmode
;
18572 if (HOST_BITS_PER_WIDE_INT
>= 64)
18575 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18582 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18586 lo
= ~lo
, hi
= ~hi
;
18592 mask
= immed_double_const (lo
, hi
, imode
);
18594 vec
= gen_rtvec (2, v
, mask
);
18595 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18596 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18603 gcc_unreachable ();
18607 lo
= ~lo
, hi
= ~hi
;
18609 /* Force this value into the low part of a fp vector constant. */
18610 mask
= immed_double_const (lo
, hi
, imode
);
18611 mask
= gen_lowpart (mode
, mask
);
18613 if (vec_mode
== VOIDmode
)
18614 return force_reg (mode
, mask
);
18616 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18617 return force_reg (vec_mode
, v
);
18620 /* Generate code for floating point ABS or NEG. */
18623 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18626 rtx mask
, set
, dst
, src
;
18627 bool use_sse
= false;
18628 bool vector_mode
= VECTOR_MODE_P (mode
);
18629 enum machine_mode vmode
= mode
;
18633 else if (mode
== TFmode
)
18635 else if (TARGET_SSE_MATH
)
18637 use_sse
= SSE_FLOAT_MODE_P (mode
);
18638 if (mode
== SFmode
)
18640 else if (mode
== DFmode
)
18644 /* NEG and ABS performed with SSE use bitwise mask operations.
18645 Create the appropriate mask now. */
18647 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18654 set
= gen_rtx_fmt_e (code
, mode
, src
);
18655 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18662 use
= gen_rtx_USE (VOIDmode
, mask
);
18664 par
= gen_rtvec (2, set
, use
);
18667 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18668 par
= gen_rtvec (3, set
, use
, clob
);
18670 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18676 /* Expand a copysign operation. Special case operand 0 being a constant. */
18679 ix86_expand_copysign (rtx operands
[])
18681 enum machine_mode mode
, vmode
;
18682 rtx dest
, op0
, op1
, mask
, nmask
;
18684 dest
= operands
[0];
18688 mode
= GET_MODE (dest
);
18690 if (mode
== SFmode
)
18692 else if (mode
== DFmode
)
18697 if (GET_CODE (op0
) == CONST_DOUBLE
)
18699 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18701 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18702 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18704 if (mode
== SFmode
|| mode
== DFmode
)
18706 if (op0
== CONST0_RTX (mode
))
18707 op0
= CONST0_RTX (vmode
);
18710 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18712 op0
= force_reg (vmode
, v
);
18715 else if (op0
!= CONST0_RTX (mode
))
18716 op0
= force_reg (mode
, op0
);
18718 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18720 if (mode
== SFmode
)
18721 copysign_insn
= gen_copysignsf3_const
;
18722 else if (mode
== DFmode
)
18723 copysign_insn
= gen_copysigndf3_const
;
18725 copysign_insn
= gen_copysigntf3_const
;
18727 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18731 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18733 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18734 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18736 if (mode
== SFmode
)
18737 copysign_insn
= gen_copysignsf3_var
;
18738 else if (mode
== DFmode
)
18739 copysign_insn
= gen_copysigndf3_var
;
18741 copysign_insn
= gen_copysigntf3_var
;
18743 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18747 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18748 be a constant, and so has already been expanded into a vector constant. */
18751 ix86_split_copysign_const (rtx operands
[])
18753 enum machine_mode mode
, vmode
;
18754 rtx dest
, op0
, mask
, x
;
18756 dest
= operands
[0];
18758 mask
= operands
[3];
18760 mode
= GET_MODE (dest
);
18761 vmode
= GET_MODE (mask
);
18763 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18764 x
= gen_rtx_AND (vmode
, dest
, mask
);
18765 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18767 if (op0
!= CONST0_RTX (vmode
))
18769 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18770 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18774 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18775 so we have to do two masks. */
18778 ix86_split_copysign_var (rtx operands
[])
18780 enum machine_mode mode
, vmode
;
18781 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18783 dest
= operands
[0];
18784 scratch
= operands
[1];
18787 nmask
= operands
[4];
18788 mask
= operands
[5];
18790 mode
= GET_MODE (dest
);
18791 vmode
= GET_MODE (mask
);
18793 if (rtx_equal_p (op0
, op1
))
18795 /* Shouldn't happen often (it's useless, obviously), but when it does
18796 we'd generate incorrect code if we continue below. */
18797 emit_move_insn (dest
, op0
);
18801 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18803 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18805 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18806 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18809 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18810 x
= gen_rtx_NOT (vmode
, dest
);
18811 x
= gen_rtx_AND (vmode
, x
, op0
);
18812 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18816 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18818 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18820 else /* alternative 2,4 */
18822 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18823 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18824 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18826 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18828 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18830 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18831 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18833 else /* alternative 3,4 */
18835 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18837 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18838 x
= gen_rtx_AND (vmode
, dest
, op0
);
18840 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18843 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18844 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18847 /* Return TRUE or FALSE depending on whether the first SET in INSN
18848 has source and destination with matching CC modes, and that the
18849 CC mode is at least as constrained as REQ_MODE. */
18852 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18855 enum machine_mode set_mode
;
18857 set
= PATTERN (insn
);
18858 if (GET_CODE (set
) == PARALLEL
)
18859 set
= XVECEXP (set
, 0, 0);
18860 gcc_assert (GET_CODE (set
) == SET
);
18861 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18863 set_mode
= GET_MODE (SET_DEST (set
));
18867 if (req_mode
!= CCNOmode
18868 && (req_mode
!= CCmode
18869 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18873 if (req_mode
== CCGCmode
)
18877 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18881 if (req_mode
== CCZmode
)
18891 if (set_mode
!= req_mode
)
18896 gcc_unreachable ();
18899 return GET_MODE (SET_SRC (set
)) == set_mode
;
18902 /* Generate insn patterns to do an integer compare of OPERANDS. */
18905 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18907 enum machine_mode cmpmode
;
18910 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18911 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18913 /* This is very simple, but making the interface the same as in the
18914 FP case makes the rest of the code easier. */
18915 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18916 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18918 /* Return the test that should be put into the flags user, i.e.
18919 the bcc, scc, or cmov instruction. */
18920 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18923 /* Figure out whether to use ordered or unordered fp comparisons.
18924 Return the appropriate mode to use. */
18927 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18929 /* ??? In order to make all comparisons reversible, we do all comparisons
18930 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18931 all forms trapping and nontrapping comparisons, we can make inequality
18932 comparisons trapping again, since it results in better code when using
18933 FCOM based compares. */
18934 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18938 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18940 enum machine_mode mode
= GET_MODE (op0
);
18942 if (SCALAR_FLOAT_MODE_P (mode
))
18944 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18945 return ix86_fp_compare_mode (code
);
18950 /* Only zero flag is needed. */
18951 case EQ
: /* ZF=0 */
18952 case NE
: /* ZF!=0 */
18954 /* Codes needing carry flag. */
18955 case GEU
: /* CF=0 */
18956 case LTU
: /* CF=1 */
18957 /* Detect overflow checks. They need just the carry flag. */
18958 if (GET_CODE (op0
) == PLUS
18959 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18963 case GTU
: /* CF=0 & ZF=0 */
18964 case LEU
: /* CF=1 | ZF=1 */
18966 /* Codes possibly doable only with sign flag when
18967 comparing against zero. */
18968 case GE
: /* SF=OF or SF=0 */
18969 case LT
: /* SF<>OF or SF=1 */
18970 if (op1
== const0_rtx
)
18973 /* For other cases Carry flag is not required. */
18975 /* Codes doable only with sign flag when comparing
18976 against zero, but we miss jump instruction for it
18977 so we need to use relational tests against overflow
18978 that thus needs to be zero. */
18979 case GT
: /* ZF=0 & SF=OF */
18980 case LE
: /* ZF=1 | SF<>OF */
18981 if (op1
== const0_rtx
)
18985 /* strcmp pattern do (use flags) and combine may ask us for proper
18990 gcc_unreachable ();
18994 /* Return the fixed registers used for condition codes. */
18997 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
19004 /* If two condition code modes are compatible, return a condition code
19005 mode which is compatible with both. Otherwise, return
19008 static enum machine_mode
19009 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
19014 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
19017 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
19018 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
19021 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
19023 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
19029 gcc_unreachable ();
19059 /* These are only compatible with themselves, which we already
19066 /* Return a comparison we can do and that it is equivalent to
19067 swap_condition (code) apart possibly from orderedness.
19068 But, never change orderedness if TARGET_IEEE_FP, returning
19069 UNKNOWN in that case if necessary. */
19071 static enum rtx_code
19072 ix86_fp_swap_condition (enum rtx_code code
)
19076 case GT
: /* GTU - CF=0 & ZF=0 */
19077 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
19078 case GE
: /* GEU - CF=0 */
19079 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
19080 case UNLT
: /* LTU - CF=1 */
19081 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
19082 case UNLE
: /* LEU - CF=1 | ZF=1 */
19083 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
19085 return swap_condition (code
);
19089 /* Return cost of comparison CODE using the best strategy for performance.
19090 All following functions do use number of instructions as a cost metrics.
19091 In future this should be tweaked to compute bytes for optimize_size and
19092 take into account performance of various instructions on various CPUs. */
19095 ix86_fp_comparison_cost (enum rtx_code code
)
19099 /* The cost of code using bit-twiddling on %ah. */
19116 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
19120 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
19123 gcc_unreachable ();
19126 switch (ix86_fp_comparison_strategy (code
))
19128 case IX86_FPCMP_COMI
:
19129 return arith_cost
> 4 ? 3 : 2;
19130 case IX86_FPCMP_SAHF
:
19131 return arith_cost
> 4 ? 4 : 3;
19137 /* Return strategy to use for floating-point. We assume that fcomi is always
19138 preferrable where available, since that is also true when looking at size
19139 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19141 enum ix86_fpcmp_strategy
19142 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
19144 /* Do fcomi/sahf based test when profitable. */
19147 return IX86_FPCMP_COMI
;
19149 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
19150 return IX86_FPCMP_SAHF
;
19152 return IX86_FPCMP_ARITH
;
19155 /* Swap, force into registers, or otherwise massage the two operands
19156 to a fp comparison. The operands are updated in place; the new
19157 comparison code is returned. */
19159 static enum rtx_code
19160 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
19162 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
19163 rtx op0
= *pop0
, op1
= *pop1
;
19164 enum machine_mode op_mode
= GET_MODE (op0
);
19165 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
19167 /* All of the unordered compare instructions only work on registers.
19168 The same is true of the fcomi compare instructions. The XFmode
19169 compare instructions require registers except when comparing
19170 against zero or when converting operand 1 from fixed point to
19174 && (fpcmp_mode
== CCFPUmode
19175 || (op_mode
== XFmode
19176 && ! (standard_80387_constant_p (op0
) == 1
19177 || standard_80387_constant_p (op1
) == 1)
19178 && GET_CODE (op1
) != FLOAT
)
19179 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19181 op0
= force_reg (op_mode
, op0
);
19182 op1
= force_reg (op_mode
, op1
);
19186 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19187 things around if they appear profitable, otherwise force op0
19188 into a register. */
19190 if (standard_80387_constant_p (op0
) == 0
19192 && ! (standard_80387_constant_p (op1
) == 0
19195 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19196 if (new_code
!= UNKNOWN
)
19199 tmp
= op0
, op0
= op1
, op1
= tmp
;
19205 op0
= force_reg (op_mode
, op0
);
19207 if (CONSTANT_P (op1
))
19209 int tmp
= standard_80387_constant_p (op1
);
19211 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19215 op1
= force_reg (op_mode
, op1
);
19218 op1
= force_reg (op_mode
, op1
);
19222 /* Try to rearrange the comparison to make it cheaper. */
19223 if (ix86_fp_comparison_cost (code
)
19224 > ix86_fp_comparison_cost (swap_condition (code
))
19225 && (REG_P (op1
) || can_create_pseudo_p ()))
19228 tmp
= op0
, op0
= op1
, op1
= tmp
;
19229 code
= swap_condition (code
);
19231 op0
= force_reg (op_mode
, op0
);
19239 /* Convert comparison codes we use to represent FP comparison to integer
19240 code that will result in proper branch. Return UNKNOWN if no such code
19244 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19273 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19276 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19278 enum machine_mode fpcmp_mode
, intcmp_mode
;
19281 fpcmp_mode
= ix86_fp_compare_mode (code
);
19282 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19284 /* Do fcomi/sahf based test when profitable. */
19285 switch (ix86_fp_comparison_strategy (code
))
19287 case IX86_FPCMP_COMI
:
19288 intcmp_mode
= fpcmp_mode
;
19289 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19290 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19295 case IX86_FPCMP_SAHF
:
19296 intcmp_mode
= fpcmp_mode
;
19297 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19298 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19302 scratch
= gen_reg_rtx (HImode
);
19303 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19304 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19307 case IX86_FPCMP_ARITH
:
19308 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19309 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19310 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19312 scratch
= gen_reg_rtx (HImode
);
19313 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19315 /* In the unordered case, we have to check C2 for NaN's, which
19316 doesn't happen to work out to anything nice combination-wise.
19317 So do some bit twiddling on the value we've got in AH to come
19318 up with an appropriate set of condition codes. */
19320 intcmp_mode
= CCNOmode
;
19325 if (code
== GT
|| !TARGET_IEEE_FP
)
19327 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19332 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19333 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19334 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19335 intcmp_mode
= CCmode
;
19341 if (code
== LT
&& TARGET_IEEE_FP
)
19343 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19344 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19345 intcmp_mode
= CCmode
;
19350 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19356 if (code
== GE
|| !TARGET_IEEE_FP
)
19358 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19363 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19364 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19370 if (code
== LE
&& TARGET_IEEE_FP
)
19372 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19373 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19374 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19375 intcmp_mode
= CCmode
;
19380 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19386 if (code
== EQ
&& TARGET_IEEE_FP
)
19388 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19389 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19390 intcmp_mode
= CCmode
;
19395 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19401 if (code
== NE
&& TARGET_IEEE_FP
)
19403 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19404 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19410 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19416 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19420 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19425 gcc_unreachable ();
19433 /* Return the test that should be put into the flags user, i.e.
19434 the bcc, scc, or cmov instruction. */
19435 return gen_rtx_fmt_ee (code
, VOIDmode
,
19436 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19441 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19445 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19446 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19448 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19450 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19451 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19454 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19460 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19462 enum machine_mode mode
= GET_MODE (op0
);
19474 tmp
= ix86_expand_compare (code
, op0
, op1
);
19475 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19476 gen_rtx_LABEL_REF (VOIDmode
, label
),
19478 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19485 /* Expand DImode branch into multiple compare+branch. */
19487 rtx lo
[2], hi
[2], label2
;
19488 enum rtx_code code1
, code2
, code3
;
19489 enum machine_mode submode
;
19491 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19493 tmp
= op0
, op0
= op1
, op1
= tmp
;
19494 code
= swap_condition (code
);
19497 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19498 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19500 submode
= mode
== DImode
? SImode
: DImode
;
19502 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19503 avoid two branches. This costs one extra insn, so disable when
19504 optimizing for size. */
19506 if ((code
== EQ
|| code
== NE
)
19507 && (!optimize_insn_for_size_p ()
19508 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19513 if (hi
[1] != const0_rtx
)
19514 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19515 NULL_RTX
, 0, OPTAB_WIDEN
);
19518 if (lo
[1] != const0_rtx
)
19519 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19520 NULL_RTX
, 0, OPTAB_WIDEN
);
19522 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19523 NULL_RTX
, 0, OPTAB_WIDEN
);
19525 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19529 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19530 op1 is a constant and the low word is zero, then we can just
19531 examine the high word. Similarly for low word -1 and
19532 less-or-equal-than or greater-than. */
19534 if (CONST_INT_P (hi
[1]))
19537 case LT
: case LTU
: case GE
: case GEU
:
19538 if (lo
[1] == const0_rtx
)
19540 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19544 case LE
: case LEU
: case GT
: case GTU
:
19545 if (lo
[1] == constm1_rtx
)
19547 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19555 /* Otherwise, we need two or three jumps. */
19557 label2
= gen_label_rtx ();
19560 code2
= swap_condition (code
);
19561 code3
= unsigned_condition (code
);
19565 case LT
: case GT
: case LTU
: case GTU
:
19568 case LE
: code1
= LT
; code2
= GT
; break;
19569 case GE
: code1
= GT
; code2
= LT
; break;
19570 case LEU
: code1
= LTU
; code2
= GTU
; break;
19571 case GEU
: code1
= GTU
; code2
= LTU
; break;
19573 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19574 case NE
: code2
= UNKNOWN
; break;
19577 gcc_unreachable ();
19582 * if (hi(a) < hi(b)) goto true;
19583 * if (hi(a) > hi(b)) goto false;
19584 * if (lo(a) < lo(b)) goto true;
19588 if (code1
!= UNKNOWN
)
19589 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19590 if (code2
!= UNKNOWN
)
19591 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19593 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19595 if (code2
!= UNKNOWN
)
19596 emit_label (label2
);
19601 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19606 /* Split branch based on floating point condition. */
19608 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19609 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19614 if (target2
!= pc_rtx
)
19617 code
= reverse_condition_maybe_unordered (code
);
19622 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19625 /* Remove pushed operand from stack. */
19627 ix86_free_from_memory (GET_MODE (pushed
));
19629 i
= emit_jump_insn (gen_rtx_SET
19631 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19632 condition
, target1
, target2
)));
19633 if (split_branch_probability
>= 0)
19634 add_int_reg_note (i
, REG_BR_PROB
, split_branch_probability
);
19638 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19642 gcc_assert (GET_MODE (dest
) == QImode
);
19644 ret
= ix86_expand_compare (code
, op0
, op1
);
19645 PUT_MODE (ret
, QImode
);
19646 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19649 /* Expand comparison setting or clearing carry flag. Return true when
19650 successful and set pop for the operation. */
19652 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19654 enum machine_mode mode
=
19655 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19657 /* Do not handle double-mode compares that go through special path. */
19658 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19661 if (SCALAR_FLOAT_MODE_P (mode
))
19663 rtx compare_op
, compare_seq
;
19665 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19667 /* Shortcut: following common codes never translate
19668 into carry flag compares. */
19669 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19670 || code
== ORDERED
|| code
== UNORDERED
)
19673 /* These comparisons require zero flag; swap operands so they won't. */
19674 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19675 && !TARGET_IEEE_FP
)
19680 code
= swap_condition (code
);
19683 /* Try to expand the comparison and verify that we end up with
19684 carry flag based comparison. This fails to be true only when
19685 we decide to expand comparison using arithmetic that is not
19686 too common scenario. */
19688 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19689 compare_seq
= get_insns ();
19692 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19693 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19694 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19696 code
= GET_CODE (compare_op
);
19698 if (code
!= LTU
&& code
!= GEU
)
19701 emit_insn (compare_seq
);
19706 if (!INTEGRAL_MODE_P (mode
))
19715 /* Convert a==0 into (unsigned)a<1. */
19718 if (op1
!= const0_rtx
)
19721 code
= (code
== EQ
? LTU
: GEU
);
19724 /* Convert a>b into b<a or a>=b-1. */
19727 if (CONST_INT_P (op1
))
19729 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19730 /* Bail out on overflow. We still can swap operands but that
19731 would force loading of the constant into register. */
19732 if (op1
== const0_rtx
19733 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19735 code
= (code
== GTU
? GEU
: LTU
);
19742 code
= (code
== GTU
? LTU
: GEU
);
19746 /* Convert a>=0 into (unsigned)a<0x80000000. */
19749 if (mode
== DImode
|| op1
!= const0_rtx
)
19751 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19752 code
= (code
== LT
? GEU
: LTU
);
19756 if (mode
== DImode
|| op1
!= constm1_rtx
)
19758 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19759 code
= (code
== LE
? GEU
: LTU
);
19765 /* Swapping operands may cause constant to appear as first operand. */
19766 if (!nonimmediate_operand (op0
, VOIDmode
))
19768 if (!can_create_pseudo_p ())
19770 op0
= force_reg (mode
, op0
);
19772 *pop
= ix86_expand_compare (code
, op0
, op1
);
19773 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19778 ix86_expand_int_movcc (rtx operands
[])
19780 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19781 rtx compare_seq
, compare_op
;
19782 enum machine_mode mode
= GET_MODE (operands
[0]);
19783 bool sign_bit_compare_p
= false;
19784 rtx op0
= XEXP (operands
[1], 0);
19785 rtx op1
= XEXP (operands
[1], 1);
19787 if (GET_MODE (op0
) == TImode
19788 || (GET_MODE (op0
) == DImode
19793 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19794 compare_seq
= get_insns ();
19797 compare_code
= GET_CODE (compare_op
);
19799 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19800 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19801 sign_bit_compare_p
= true;
19803 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19804 HImode insns, we'd be swallowed in word prefix ops. */
19806 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19807 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19808 && CONST_INT_P (operands
[2])
19809 && CONST_INT_P (operands
[3]))
19811 rtx out
= operands
[0];
19812 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19813 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19814 HOST_WIDE_INT diff
;
19817 /* Sign bit compares are better done using shifts than we do by using
19819 if (sign_bit_compare_p
19820 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19822 /* Detect overlap between destination and compare sources. */
19825 if (!sign_bit_compare_p
)
19828 bool fpcmp
= false;
19830 compare_code
= GET_CODE (compare_op
);
19832 flags
= XEXP (compare_op
, 0);
19834 if (GET_MODE (flags
) == CCFPmode
19835 || GET_MODE (flags
) == CCFPUmode
)
19839 = ix86_fp_compare_code_to_integer (compare_code
);
19842 /* To simplify rest of code, restrict to the GEU case. */
19843 if (compare_code
== LTU
)
19845 HOST_WIDE_INT tmp
= ct
;
19848 compare_code
= reverse_condition (compare_code
);
19849 code
= reverse_condition (code
);
19854 PUT_CODE (compare_op
,
19855 reverse_condition_maybe_unordered
19856 (GET_CODE (compare_op
)));
19858 PUT_CODE (compare_op
,
19859 reverse_condition (GET_CODE (compare_op
)));
19863 if (reg_overlap_mentioned_p (out
, op0
)
19864 || reg_overlap_mentioned_p (out
, op1
))
19865 tmp
= gen_reg_rtx (mode
);
19867 if (mode
== DImode
)
19868 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19870 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19871 flags
, compare_op
));
19875 if (code
== GT
|| code
== GE
)
19876 code
= reverse_condition (code
);
19879 HOST_WIDE_INT tmp
= ct
;
19884 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19897 tmp
= expand_simple_binop (mode
, PLUS
,
19899 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19910 tmp
= expand_simple_binop (mode
, IOR
,
19912 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19914 else if (diff
== -1 && ct
)
19924 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19926 tmp
= expand_simple_binop (mode
, PLUS
,
19927 copy_rtx (tmp
), GEN_INT (cf
),
19928 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19936 * andl cf - ct, dest
19946 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19949 tmp
= expand_simple_binop (mode
, AND
,
19951 gen_int_mode (cf
- ct
, mode
),
19952 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19954 tmp
= expand_simple_binop (mode
, PLUS
,
19955 copy_rtx (tmp
), GEN_INT (ct
),
19956 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19959 if (!rtx_equal_p (tmp
, out
))
19960 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19967 enum machine_mode cmp_mode
= GET_MODE (op0
);
19970 tmp
= ct
, ct
= cf
, cf
= tmp
;
19973 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19975 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19977 /* We may be reversing unordered compare to normal compare, that
19978 is not valid in general (we may convert non-trapping condition
19979 to trapping one), however on i386 we currently emit all
19980 comparisons unordered. */
19981 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19982 code
= reverse_condition_maybe_unordered (code
);
19986 compare_code
= reverse_condition (compare_code
);
19987 code
= reverse_condition (code
);
19991 compare_code
= UNKNOWN
;
19992 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19993 && CONST_INT_P (op1
))
19995 if (op1
== const0_rtx
19996 && (code
== LT
|| code
== GE
))
19997 compare_code
= code
;
19998 else if (op1
== constm1_rtx
)
20002 else if (code
== GT
)
20007 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20008 if (compare_code
!= UNKNOWN
20009 && GET_MODE (op0
) == GET_MODE (out
)
20010 && (cf
== -1 || ct
== -1))
20012 /* If lea code below could be used, only optimize
20013 if it results in a 2 insn sequence. */
20015 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20016 || diff
== 3 || diff
== 5 || diff
== 9)
20017 || (compare_code
== LT
&& ct
== -1)
20018 || (compare_code
== GE
&& cf
== -1))
20021 * notl op1 (if necessary)
20029 code
= reverse_condition (code
);
20032 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20034 out
= expand_simple_binop (mode
, IOR
,
20036 out
, 1, OPTAB_DIRECT
);
20037 if (out
!= operands
[0])
20038 emit_move_insn (operands
[0], out
);
20045 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20046 || diff
== 3 || diff
== 5 || diff
== 9)
20047 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
20049 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
20055 * lea cf(dest*(ct-cf)),dest
20059 * This also catches the degenerate setcc-only case.
20065 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20068 /* On x86_64 the lea instruction operates on Pmode, so we need
20069 to get arithmetics done in proper mode to match. */
20071 tmp
= copy_rtx (out
);
20075 out1
= copy_rtx (out
);
20076 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
20080 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
20086 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
20089 if (!rtx_equal_p (tmp
, out
))
20092 out
= force_operand (tmp
, copy_rtx (out
));
20094 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
20096 if (!rtx_equal_p (out
, operands
[0]))
20097 emit_move_insn (operands
[0], copy_rtx (out
));
20103 * General case: Jumpful:
20104 * xorl dest,dest cmpl op1, op2
20105 * cmpl op1, op2 movl ct, dest
20106 * setcc dest jcc 1f
20107 * decl dest movl cf, dest
20108 * andl (cf-ct),dest 1:
20111 * Size 20. Size 14.
20113 * This is reasonably steep, but branch mispredict costs are
20114 * high on modern cpus, so consider failing only if optimizing
20118 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20119 && BRANCH_COST (optimize_insn_for_speed_p (),
20124 enum machine_mode cmp_mode
= GET_MODE (op0
);
20129 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20131 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20133 /* We may be reversing unordered compare to normal compare,
20134 that is not valid in general (we may convert non-trapping
20135 condition to trapping one), however on i386 we currently
20136 emit all comparisons unordered. */
20137 code
= reverse_condition_maybe_unordered (code
);
20141 code
= reverse_condition (code
);
20142 if (compare_code
!= UNKNOWN
)
20143 compare_code
= reverse_condition (compare_code
);
20147 if (compare_code
!= UNKNOWN
)
20149 /* notl op1 (if needed)
20154 For x < 0 (resp. x <= -1) there will be no notl,
20155 so if possible swap the constants to get rid of the
20157 True/false will be -1/0 while code below (store flag
20158 followed by decrement) is 0/-1, so the constants need
20159 to be exchanged once more. */
20161 if (compare_code
== GE
|| !cf
)
20163 code
= reverse_condition (code
);
20168 HOST_WIDE_INT tmp
= cf
;
20173 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20177 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20179 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20181 copy_rtx (out
), 1, OPTAB_DIRECT
);
20184 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20185 gen_int_mode (cf
- ct
, mode
),
20186 copy_rtx (out
), 1, OPTAB_DIRECT
);
20188 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20189 copy_rtx (out
), 1, OPTAB_DIRECT
);
20190 if (!rtx_equal_p (out
, operands
[0]))
20191 emit_move_insn (operands
[0], copy_rtx (out
));
20197 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20199 /* Try a few things more with specific constants and a variable. */
20202 rtx var
, orig_out
, out
, tmp
;
20204 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20207 /* If one of the two operands is an interesting constant, load a
20208 constant with the above and mask it in with a logical operation. */
20210 if (CONST_INT_P (operands
[2]))
20213 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20214 operands
[3] = constm1_rtx
, op
= and_optab
;
20215 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20216 operands
[3] = const0_rtx
, op
= ior_optab
;
20220 else if (CONST_INT_P (operands
[3]))
20223 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20224 operands
[2] = constm1_rtx
, op
= and_optab
;
20225 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20226 operands
[2] = const0_rtx
, op
= ior_optab
;
20233 orig_out
= operands
[0];
20234 tmp
= gen_reg_rtx (mode
);
20237 /* Recurse to get the constant loaded. */
20238 if (ix86_expand_int_movcc (operands
) == 0)
20241 /* Mask in the interesting variable. */
20242 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20244 if (!rtx_equal_p (out
, orig_out
))
20245 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20251 * For comparison with above,
20261 if (! nonimmediate_operand (operands
[2], mode
))
20262 operands
[2] = force_reg (mode
, operands
[2]);
20263 if (! nonimmediate_operand (operands
[3], mode
))
20264 operands
[3] = force_reg (mode
, operands
[3]);
20266 if (! register_operand (operands
[2], VOIDmode
)
20268 || ! register_operand (operands
[3], VOIDmode
)))
20269 operands
[2] = force_reg (mode
, operands
[2]);
20272 && ! register_operand (operands
[3], VOIDmode
))
20273 operands
[3] = force_reg (mode
, operands
[3]);
20275 emit_insn (compare_seq
);
20276 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20277 gen_rtx_IF_THEN_ELSE (mode
,
20278 compare_op
, operands
[2],
20283 /* Swap, force into registers, or otherwise massage the two operands
20284 to an sse comparison with a mask result. Thus we differ a bit from
20285 ix86_prepare_fp_compare_args which expects to produce a flags result.
20287 The DEST operand exists to help determine whether to commute commutative
20288 operators. The POP0/POP1 operands are updated in place. The new
20289 comparison code is returned, or UNKNOWN if not implementable. */
20291 static enum rtx_code
20292 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20293 rtx
*pop0
, rtx
*pop1
)
20301 /* AVX supports all the needed comparisons. */
20304 /* We have no LTGT as an operator. We could implement it with
20305 NE & ORDERED, but this requires an extra temporary. It's
20306 not clear that it's worth it. */
20313 /* These are supported directly. */
20320 /* AVX has 3 operand comparisons, no need to swap anything. */
20323 /* For commutative operators, try to canonicalize the destination
20324 operand to be first in the comparison - this helps reload to
20325 avoid extra moves. */
20326 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20334 /* These are not supported directly before AVX, and furthermore
20335 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20336 comparison operands to transform into something that is
20341 code
= swap_condition (code
);
20345 gcc_unreachable ();
20351 /* Detect conditional moves that exactly match min/max operational
20352 semantics. Note that this is IEEE safe, as long as we don't
20353 interchange the operands.
20355 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20356 and TRUE if the operation is successful and instructions are emitted. */
20359 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20360 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20362 enum machine_mode mode
;
20368 else if (code
== UNGE
)
20371 if_true
= if_false
;
20377 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20379 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20384 mode
= GET_MODE (dest
);
20386 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20387 but MODE may be a vector mode and thus not appropriate. */
20388 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20390 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20393 if_true
= force_reg (mode
, if_true
);
20394 v
= gen_rtvec (2, if_true
, if_false
);
20395 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20399 code
= is_min
? SMIN
: SMAX
;
20400 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20403 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20407 /* Expand an sse vector comparison. Return the register with the result. */
20410 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20411 rtx op_true
, rtx op_false
)
20413 enum machine_mode mode
= GET_MODE (dest
);
20414 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20417 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20418 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20419 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20422 || reg_overlap_mentioned_p (dest
, op_true
)
20423 || reg_overlap_mentioned_p (dest
, op_false
))
20424 dest
= gen_reg_rtx (mode
);
20426 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20427 if (cmp_mode
!= mode
)
20429 x
= force_reg (cmp_mode
, x
);
20430 convert_move (dest
, x
, false);
20433 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20438 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20439 operations. This is used for both scalar and vector conditional moves. */
20442 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20444 enum machine_mode mode
= GET_MODE (dest
);
20447 if (vector_all_ones_operand (op_true
, mode
)
20448 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20450 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20452 else if (op_false
== CONST0_RTX (mode
))
20454 op_true
= force_reg (mode
, op_true
);
20455 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20456 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20458 else if (op_true
== CONST0_RTX (mode
))
20460 op_false
= force_reg (mode
, op_false
);
20461 x
= gen_rtx_NOT (mode
, cmp
);
20462 x
= gen_rtx_AND (mode
, x
, op_false
);
20463 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20465 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20467 op_false
= force_reg (mode
, op_false
);
20468 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20469 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20471 else if (TARGET_XOP
)
20473 op_true
= force_reg (mode
, op_true
);
20475 if (!nonimmediate_operand (op_false
, mode
))
20476 op_false
= force_reg (mode
, op_false
);
20478 emit_insn (gen_rtx_SET (mode
, dest
,
20479 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20485 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20488 if (!nonimmediate_operand (op_true
, mode
))
20489 op_true
= force_reg (mode
, op_true
);
20491 op_false
= force_reg (mode
, op_false
);
20497 gen
= gen_sse4_1_blendvps
;
20501 gen
= gen_sse4_1_blendvpd
;
20509 gen
= gen_sse4_1_pblendvb
;
20510 if (mode
!= V16QImode
)
20511 d
= gen_reg_rtx (V16QImode
);
20512 op_false
= gen_lowpart (V16QImode
, op_false
);
20513 op_true
= gen_lowpart (V16QImode
, op_true
);
20514 cmp
= gen_lowpart (V16QImode
, cmp
);
20519 gen
= gen_avx_blendvps256
;
20523 gen
= gen_avx_blendvpd256
;
20531 gen
= gen_avx2_pblendvb
;
20532 if (mode
!= V32QImode
)
20533 d
= gen_reg_rtx (V32QImode
);
20534 op_false
= gen_lowpart (V32QImode
, op_false
);
20535 op_true
= gen_lowpart (V32QImode
, op_true
);
20536 cmp
= gen_lowpart (V32QImode
, cmp
);
20545 emit_insn (gen (d
, op_false
, op_true
, cmp
));
20547 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
20551 op_true
= force_reg (mode
, op_true
);
20553 t2
= gen_reg_rtx (mode
);
20555 t3
= gen_reg_rtx (mode
);
20559 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20560 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20562 x
= gen_rtx_NOT (mode
, cmp
);
20563 x
= gen_rtx_AND (mode
, x
, op_false
);
20564 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20566 x
= gen_rtx_IOR (mode
, t3
, t2
);
20567 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20572 /* Expand a floating-point conditional move. Return true if successful. */
20575 ix86_expand_fp_movcc (rtx operands
[])
20577 enum machine_mode mode
= GET_MODE (operands
[0]);
20578 enum rtx_code code
= GET_CODE (operands
[1]);
20579 rtx tmp
, compare_op
;
20580 rtx op0
= XEXP (operands
[1], 0);
20581 rtx op1
= XEXP (operands
[1], 1);
20583 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20585 enum machine_mode cmode
;
20587 /* Since we've no cmove for sse registers, don't force bad register
20588 allocation just to gain access to it. Deny movcc when the
20589 comparison mode doesn't match the move mode. */
20590 cmode
= GET_MODE (op0
);
20591 if (cmode
== VOIDmode
)
20592 cmode
= GET_MODE (op1
);
20596 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20597 if (code
== UNKNOWN
)
20600 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20601 operands
[2], operands
[3]))
20604 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20605 operands
[2], operands
[3]);
20606 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20610 if (GET_MODE (op0
) == TImode
20611 || (GET_MODE (op0
) == DImode
20615 /* The floating point conditional move instructions don't directly
20616 support conditions resulting from a signed integer comparison. */
20618 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20619 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20621 tmp
= gen_reg_rtx (QImode
);
20622 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20624 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20627 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20628 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20629 operands
[2], operands
[3])));
20634 /* Expand a floating-point vector conditional move; a vcond operation
20635 rather than a movcc operation. */
20638 ix86_expand_fp_vcond (rtx operands
[])
20640 enum rtx_code code
= GET_CODE (operands
[3]);
20643 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20644 &operands
[4], &operands
[5]);
20645 if (code
== UNKNOWN
)
20648 switch (GET_CODE (operands
[3]))
20651 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20652 operands
[5], operands
[0], operands
[0]);
20653 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20654 operands
[5], operands
[1], operands
[2]);
20658 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20659 operands
[5], operands
[0], operands
[0]);
20660 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20661 operands
[5], operands
[1], operands
[2]);
20665 gcc_unreachable ();
20667 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20669 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20673 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20674 operands
[5], operands
[1], operands
[2]))
20677 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20678 operands
[1], operands
[2]);
20679 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20683 /* Expand a signed/unsigned integral vector conditional move. */
20686 ix86_expand_int_vcond (rtx operands
[])
20688 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20689 enum machine_mode mode
= GET_MODE (operands
[4]);
20690 enum rtx_code code
= GET_CODE (operands
[3]);
20691 bool negate
= false;
20694 cop0
= operands
[4];
20695 cop1
= operands
[5];
20697 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20698 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20699 if ((code
== LT
|| code
== GE
)
20700 && data_mode
== mode
20701 && cop1
== CONST0_RTX (mode
)
20702 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20703 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20704 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20705 && (GET_MODE_SIZE (data_mode
) == 16
20706 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20708 rtx negop
= operands
[2 - (code
== LT
)];
20709 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20710 if (negop
== CONST1_RTX (data_mode
))
20712 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20713 operands
[0], 1, OPTAB_DIRECT
);
20714 if (res
!= operands
[0])
20715 emit_move_insn (operands
[0], res
);
20718 else if (GET_MODE_INNER (data_mode
) != DImode
20719 && vector_all_ones_operand (negop
, data_mode
))
20721 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20722 operands
[0], 0, OPTAB_DIRECT
);
20723 if (res
!= operands
[0])
20724 emit_move_insn (operands
[0], res
);
20729 if (!nonimmediate_operand (cop1
, mode
))
20730 cop1
= force_reg (mode
, cop1
);
20731 if (!general_operand (operands
[1], data_mode
))
20732 operands
[1] = force_reg (data_mode
, operands
[1]);
20733 if (!general_operand (operands
[2], data_mode
))
20734 operands
[2] = force_reg (data_mode
, operands
[2]);
20736 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20738 && (mode
== V16QImode
|| mode
== V8HImode
20739 || mode
== V4SImode
|| mode
== V2DImode
))
20743 /* Canonicalize the comparison to EQ, GT, GTU. */
20754 code
= reverse_condition (code
);
20760 code
= reverse_condition (code
);
20766 code
= swap_condition (code
);
20767 x
= cop0
, cop0
= cop1
, cop1
= x
;
20771 gcc_unreachable ();
20774 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20775 if (mode
== V2DImode
)
20780 /* SSE4.1 supports EQ. */
20781 if (!TARGET_SSE4_1
)
20787 /* SSE4.2 supports GT/GTU. */
20788 if (!TARGET_SSE4_2
)
20793 gcc_unreachable ();
20797 /* Unsigned parallel compare is not supported by the hardware.
20798 Play some tricks to turn this into a signed comparison
20802 cop0
= force_reg (mode
, cop0
);
20812 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20816 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20817 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20818 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20819 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20821 gcc_unreachable ();
20823 /* Subtract (-(INT MAX) - 1) from both operands to make
20825 mask
= ix86_build_signbit_mask (mode
, true, false);
20826 t1
= gen_reg_rtx (mode
);
20827 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20829 t2
= gen_reg_rtx (mode
);
20830 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20842 /* Perform a parallel unsigned saturating subtraction. */
20843 x
= gen_reg_rtx (mode
);
20844 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20845 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20848 cop1
= CONST0_RTX (mode
);
20854 gcc_unreachable ();
20859 /* Allow the comparison to be done in one mode, but the movcc to
20860 happen in another mode. */
20861 if (data_mode
== mode
)
20863 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20864 operands
[1+negate
], operands
[2-negate
]);
20868 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20869 x
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), code
, cop0
, cop1
,
20870 operands
[1+negate
], operands
[2-negate
]);
20871 x
= gen_lowpart (data_mode
, x
);
20874 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20875 operands
[2-negate
]);
20879 /* Expand a variable vector permutation. */
20882 ix86_expand_vec_perm (rtx operands
[])
20884 rtx target
= operands
[0];
20885 rtx op0
= operands
[1];
20886 rtx op1
= operands
[2];
20887 rtx mask
= operands
[3];
20888 rtx t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, vt
, vt2
, vec
[32];
20889 enum machine_mode mode
= GET_MODE (op0
);
20890 enum machine_mode maskmode
= GET_MODE (mask
);
20892 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20894 /* Number of elements in the vector. */
20895 w
= GET_MODE_NUNITS (mode
);
20896 e
= GET_MODE_UNIT_SIZE (mode
);
20897 gcc_assert (w
<= 32);
20901 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20903 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20904 an constant shuffle operand. With a tiny bit of effort we can
20905 use VPERMD instead. A re-interpretation stall for V4DFmode is
20906 unfortunate but there's no avoiding it.
20907 Similarly for V16HImode we don't have instructions for variable
20908 shuffling, while for V32QImode we can use after preparing suitable
20909 masks vpshufb; vpshufb; vpermq; vpor. */
20911 if (mode
== V16HImode
)
20913 maskmode
= mode
= V32QImode
;
20919 maskmode
= mode
= V8SImode
;
20923 t1
= gen_reg_rtx (maskmode
);
20925 /* Replicate the low bits of the V4DImode mask into V8SImode:
20927 t1 = { A A B B C C D D }. */
20928 for (i
= 0; i
< w
/ 2; ++i
)
20929 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20930 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20931 vt
= force_reg (maskmode
, vt
);
20932 mask
= gen_lowpart (maskmode
, mask
);
20933 if (maskmode
== V8SImode
)
20934 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20936 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20938 /* Multiply the shuffle indicies by two. */
20939 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20942 /* Add one to the odd shuffle indicies:
20943 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20944 for (i
= 0; i
< w
/ 2; ++i
)
20946 vec
[i
* 2] = const0_rtx
;
20947 vec
[i
* 2 + 1] = const1_rtx
;
20949 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20950 vt
= validize_mem (force_const_mem (maskmode
, vt
));
20951 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20954 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20955 operands
[3] = mask
= t1
;
20956 target
= gen_reg_rtx (mode
);
20957 op0
= gen_lowpart (mode
, op0
);
20958 op1
= gen_lowpart (mode
, op1
);
20964 /* The VPERMD and VPERMPS instructions already properly ignore
20965 the high bits of the shuffle elements. No need for us to
20966 perform an AND ourselves. */
20967 if (one_operand_shuffle
)
20969 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20970 if (target
!= operands
[0])
20971 emit_move_insn (operands
[0],
20972 gen_lowpart (GET_MODE (operands
[0]), target
));
20976 t1
= gen_reg_rtx (V8SImode
);
20977 t2
= gen_reg_rtx (V8SImode
);
20978 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20979 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20985 mask
= gen_lowpart (V8SFmode
, mask
);
20986 if (one_operand_shuffle
)
20987 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20990 t1
= gen_reg_rtx (V8SFmode
);
20991 t2
= gen_reg_rtx (V8SFmode
);
20992 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20993 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20999 /* By combining the two 128-bit input vectors into one 256-bit
21000 input vector, we can use VPERMD and VPERMPS for the full
21001 two-operand shuffle. */
21002 t1
= gen_reg_rtx (V8SImode
);
21003 t2
= gen_reg_rtx (V8SImode
);
21004 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
21005 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21006 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
21007 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
21011 t1
= gen_reg_rtx (V8SFmode
);
21012 t2
= gen_reg_rtx (V8SImode
);
21013 mask
= gen_lowpart (V4SImode
, mask
);
21014 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
21015 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21016 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
21017 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
21021 t1
= gen_reg_rtx (V32QImode
);
21022 t2
= gen_reg_rtx (V32QImode
);
21023 t3
= gen_reg_rtx (V32QImode
);
21024 vt2
= GEN_INT (128);
21025 for (i
= 0; i
< 32; i
++)
21027 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21028 vt
= force_reg (V32QImode
, vt
);
21029 for (i
= 0; i
< 32; i
++)
21030 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
21031 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21032 vt2
= force_reg (V32QImode
, vt2
);
21033 /* From mask create two adjusted masks, which contain the same
21034 bits as mask in the low 7 bits of each vector element.
21035 The first mask will have the most significant bit clear
21036 if it requests element from the same 128-bit lane
21037 and MSB set if it requests element from the other 128-bit lane.
21038 The second mask will have the opposite values of the MSB,
21039 and additionally will have its 128-bit lanes swapped.
21040 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21041 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21042 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21043 stands for other 12 bytes. */
21044 /* The bit whether element is from the same lane or the other
21045 lane is bit 4, so shift it up by 3 to the MSB position. */
21046 t5
= gen_reg_rtx (V4DImode
);
21047 emit_insn (gen_ashlv4di3 (t5
, gen_lowpart (V4DImode
, mask
),
21049 /* Clear MSB bits from the mask just in case it had them set. */
21050 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
21051 /* After this t1 will have MSB set for elements from other lane. */
21052 emit_insn (gen_xorv32qi3 (t1
, gen_lowpart (V32QImode
, t5
), vt2
));
21053 /* Clear bits other than MSB. */
21054 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
21055 /* Or in the lower bits from mask into t3. */
21056 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
21057 /* And invert MSB bits in t1, so MSB is set for elements from the same
21059 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
21060 /* Swap 128-bit lanes in t3. */
21061 t6
= gen_reg_rtx (V4DImode
);
21062 emit_insn (gen_avx2_permv4di_1 (t6
, gen_lowpart (V4DImode
, t3
),
21063 const2_rtx
, GEN_INT (3),
21064 const0_rtx
, const1_rtx
));
21065 /* And or in the lower bits from mask into t1. */
21066 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
21067 if (one_operand_shuffle
)
21069 /* Each of these shuffles will put 0s in places where
21070 element from the other 128-bit lane is needed, otherwise
21071 will shuffle in the requested value. */
21072 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
,
21073 gen_lowpart (V32QImode
, t6
)));
21074 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
21075 /* For t3 the 128-bit lanes are swapped again. */
21076 t7
= gen_reg_rtx (V4DImode
);
21077 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t3
),
21078 const2_rtx
, GEN_INT (3),
21079 const0_rtx
, const1_rtx
));
21080 /* And oring both together leads to the result. */
21081 emit_insn (gen_iorv32qi3 (target
, t1
,
21082 gen_lowpart (V32QImode
, t7
)));
21083 if (target
!= operands
[0])
21084 emit_move_insn (operands
[0],
21085 gen_lowpart (GET_MODE (operands
[0]), target
));
21089 t4
= gen_reg_rtx (V32QImode
);
21090 /* Similarly to the above one_operand_shuffle code,
21091 just for repeated twice for each operand. merge_two:
21092 code will merge the two results together. */
21093 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
,
21094 gen_lowpart (V32QImode
, t6
)));
21095 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
,
21096 gen_lowpart (V32QImode
, t6
)));
21097 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
21098 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
21099 t7
= gen_reg_rtx (V4DImode
);
21100 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t4
),
21101 const2_rtx
, GEN_INT (3),
21102 const0_rtx
, const1_rtx
));
21103 t8
= gen_reg_rtx (V4DImode
);
21104 emit_insn (gen_avx2_permv4di_1 (t8
, gen_lowpart (V4DImode
, t3
),
21105 const2_rtx
, GEN_INT (3),
21106 const0_rtx
, const1_rtx
));
21107 emit_insn (gen_iorv32qi3 (t4
, t2
, gen_lowpart (V32QImode
, t7
)));
21108 emit_insn (gen_iorv32qi3 (t3
, t1
, gen_lowpart (V32QImode
, t8
)));
21114 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
21121 /* The XOP VPPERM insn supports three inputs. By ignoring the
21122 one_operand_shuffle special case, we avoid creating another
21123 set of constant vectors in memory. */
21124 one_operand_shuffle
= false;
21126 /* mask = mask & {2*w-1, ...} */
21127 vt
= GEN_INT (2*w
- 1);
21131 /* mask = mask & {w-1, ...} */
21132 vt
= GEN_INT (w
- 1);
21135 for (i
= 0; i
< w
; i
++)
21137 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21138 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21139 NULL_RTX
, 0, OPTAB_DIRECT
);
21141 /* For non-QImode operations, convert the word permutation control
21142 into a byte permutation control. */
21143 if (mode
!= V16QImode
)
21145 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
21146 GEN_INT (exact_log2 (e
)),
21147 NULL_RTX
, 0, OPTAB_DIRECT
);
21149 /* Convert mask to vector of chars. */
21150 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
21152 /* Replicate each of the input bytes into byte positions:
21153 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21154 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21155 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21156 for (i
= 0; i
< 16; ++i
)
21157 vec
[i
] = GEN_INT (i
/e
* e
);
21158 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21159 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21161 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
21163 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
21165 /* Convert it into the byte positions by doing
21166 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21167 for (i
= 0; i
< 16; ++i
)
21168 vec
[i
] = GEN_INT (i
% e
);
21169 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21170 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21171 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
21174 /* The actual shuffle operations all operate on V16QImode. */
21175 op0
= gen_lowpart (V16QImode
, op0
);
21176 op1
= gen_lowpart (V16QImode
, op1
);
21180 if (GET_MODE (target
) != V16QImode
)
21181 target
= gen_reg_rtx (V16QImode
);
21182 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
21183 if (target
!= operands
[0])
21184 emit_move_insn (operands
[0],
21185 gen_lowpart (GET_MODE (operands
[0]), target
));
21187 else if (one_operand_shuffle
)
21189 if (GET_MODE (target
) != V16QImode
)
21190 target
= gen_reg_rtx (V16QImode
);
21191 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
21192 if (target
!= operands
[0])
21193 emit_move_insn (operands
[0],
21194 gen_lowpart (GET_MODE (operands
[0]), target
));
21201 /* Shuffle the two input vectors independently. */
21202 t1
= gen_reg_rtx (V16QImode
);
21203 t2
= gen_reg_rtx (V16QImode
);
21204 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
21205 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21208 /* Then merge them together. The key is whether any given control
21209 element contained a bit set that indicates the second word. */
21210 mask
= operands
[3];
21212 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21214 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21215 more shuffle to convert the V2DI input mask into a V4SI
21216 input mask. At which point the masking that expand_int_vcond
21217 will work as desired. */
21218 rtx t3
= gen_reg_rtx (V4SImode
);
21219 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21220 const0_rtx
, const0_rtx
,
21221 const2_rtx
, const2_rtx
));
21223 maskmode
= V4SImode
;
21227 for (i
= 0; i
< w
; i
++)
21229 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21230 vt
= force_reg (maskmode
, vt
);
21231 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21232 NULL_RTX
, 0, OPTAB_DIRECT
);
21234 if (GET_MODE (target
) != mode
)
21235 target
= gen_reg_rtx (mode
);
21237 xops
[1] = gen_lowpart (mode
, t2
);
21238 xops
[2] = gen_lowpart (mode
, t1
);
21239 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21242 ok
= ix86_expand_int_vcond (xops
);
21244 if (target
!= operands
[0])
21245 emit_move_insn (operands
[0],
21246 gen_lowpart (GET_MODE (operands
[0]), target
));
21250 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21251 true if we should do zero extension, else sign extension. HIGH_P is
21252 true if we want the N/2 high elements, else the low elements. */
21255 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21257 enum machine_mode imode
= GET_MODE (src
);
21262 rtx (*unpack
)(rtx
, rtx
);
21263 rtx (*extract
)(rtx
, rtx
) = NULL
;
21264 enum machine_mode halfmode
= BLKmode
;
21270 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21272 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21273 halfmode
= V16QImode
;
21275 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21279 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21281 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21282 halfmode
= V8HImode
;
21284 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21288 unpack
= gen_avx2_zero_extendv4siv4di2
;
21290 unpack
= gen_avx2_sign_extendv4siv4di2
;
21291 halfmode
= V4SImode
;
21293 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21297 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21299 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21303 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21305 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21309 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21311 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21314 gcc_unreachable ();
21317 if (GET_MODE_SIZE (imode
) == 32)
21319 tmp
= gen_reg_rtx (halfmode
);
21320 emit_insn (extract (tmp
, src
));
21324 /* Shift higher 8 bytes to lower 8 bytes. */
21325 tmp
= gen_reg_rtx (V1TImode
);
21326 emit_insn (gen_sse2_lshrv1ti3 (tmp
, gen_lowpart (V1TImode
, src
),
21328 tmp
= gen_lowpart (imode
, tmp
);
21333 emit_insn (unpack (dest
, tmp
));
21337 rtx (*unpack
)(rtx
, rtx
, rtx
);
21343 unpack
= gen_vec_interleave_highv16qi
;
21345 unpack
= gen_vec_interleave_lowv16qi
;
21349 unpack
= gen_vec_interleave_highv8hi
;
21351 unpack
= gen_vec_interleave_lowv8hi
;
21355 unpack
= gen_vec_interleave_highv4si
;
21357 unpack
= gen_vec_interleave_lowv4si
;
21360 gcc_unreachable ();
21364 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21366 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21367 src
, pc_rtx
, pc_rtx
);
21369 rtx tmp2
= gen_reg_rtx (imode
);
21370 emit_insn (unpack (tmp2
, src
, tmp
));
21371 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), tmp2
));
21375 /* Expand conditional increment or decrement using adb/sbb instructions.
21376 The default case using setcc followed by the conditional move can be
21377 done by generic code. */
21379 ix86_expand_int_addcc (rtx operands
[])
21381 enum rtx_code code
= GET_CODE (operands
[1]);
21383 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21385 rtx val
= const0_rtx
;
21386 bool fpcmp
= false;
21387 enum machine_mode mode
;
21388 rtx op0
= XEXP (operands
[1], 0);
21389 rtx op1
= XEXP (operands
[1], 1);
21391 if (operands
[3] != const1_rtx
21392 && operands
[3] != constm1_rtx
)
21394 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21396 code
= GET_CODE (compare_op
);
21398 flags
= XEXP (compare_op
, 0);
21400 if (GET_MODE (flags
) == CCFPmode
21401 || GET_MODE (flags
) == CCFPUmode
)
21404 code
= ix86_fp_compare_code_to_integer (code
);
21411 PUT_CODE (compare_op
,
21412 reverse_condition_maybe_unordered
21413 (GET_CODE (compare_op
)));
21415 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21418 mode
= GET_MODE (operands
[0]);
21420 /* Construct either adc or sbb insn. */
21421 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21426 insn
= gen_subqi3_carry
;
21429 insn
= gen_subhi3_carry
;
21432 insn
= gen_subsi3_carry
;
21435 insn
= gen_subdi3_carry
;
21438 gcc_unreachable ();
21446 insn
= gen_addqi3_carry
;
21449 insn
= gen_addhi3_carry
;
21452 insn
= gen_addsi3_carry
;
21455 insn
= gen_adddi3_carry
;
21458 gcc_unreachable ();
21461 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21467 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21468 but works for floating pointer parameters and nonoffsetable memories.
21469 For pushes, it returns just stack offsets; the values will be saved
21470 in the right order. Maximally three parts are generated. */
21473 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21478 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21480 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21482 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21483 gcc_assert (size
>= 2 && size
<= 4);
21485 /* Optimize constant pool reference to immediates. This is used by fp
21486 moves, that force all constants to memory to allow combining. */
21487 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21489 rtx tmp
= maybe_get_pool_constant (operand
);
21494 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21496 /* The only non-offsetable memories we handle are pushes. */
21497 int ok
= push_operand (operand
, VOIDmode
);
21501 operand
= copy_rtx (operand
);
21502 PUT_MODE (operand
, word_mode
);
21503 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21507 if (GET_CODE (operand
) == CONST_VECTOR
)
21509 enum machine_mode imode
= int_mode_for_mode (mode
);
21510 /* Caution: if we looked through a constant pool memory above,
21511 the operand may actually have a different mode now. That's
21512 ok, since we want to pun this all the way back to an integer. */
21513 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21514 gcc_assert (operand
!= NULL
);
21520 if (mode
== DImode
)
21521 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21526 if (REG_P (operand
))
21528 gcc_assert (reload_completed
);
21529 for (i
= 0; i
< size
; i
++)
21530 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21532 else if (offsettable_memref_p (operand
))
21534 operand
= adjust_address (operand
, SImode
, 0);
21535 parts
[0] = operand
;
21536 for (i
= 1; i
< size
; i
++)
21537 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21539 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21544 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21548 real_to_target (l
, &r
, mode
);
21549 parts
[3] = gen_int_mode (l
[3], SImode
);
21550 parts
[2] = gen_int_mode (l
[2], SImode
);
21553 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21554 long double may not be 80-bit. */
21555 real_to_target (l
, &r
, mode
);
21556 parts
[2] = gen_int_mode (l
[2], SImode
);
21559 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21562 gcc_unreachable ();
21564 parts
[1] = gen_int_mode (l
[1], SImode
);
21565 parts
[0] = gen_int_mode (l
[0], SImode
);
21568 gcc_unreachable ();
21573 if (mode
== TImode
)
21574 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21575 if (mode
== XFmode
|| mode
== TFmode
)
21577 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21578 if (REG_P (operand
))
21580 gcc_assert (reload_completed
);
21581 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21582 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21584 else if (offsettable_memref_p (operand
))
21586 operand
= adjust_address (operand
, DImode
, 0);
21587 parts
[0] = operand
;
21588 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21590 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21595 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21596 real_to_target (l
, &r
, mode
);
21598 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21599 if (HOST_BITS_PER_WIDE_INT
>= 64)
21602 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21603 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21606 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21608 if (upper_mode
== SImode
)
21609 parts
[1] = gen_int_mode (l
[2], SImode
);
21610 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21613 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21614 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21617 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21620 gcc_unreachable ();
21627 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21628 Return false when normal moves are needed; true when all required
21629 insns have been emitted. Operands 2-4 contain the input values
21630 int the correct order; operands 5-7 contain the output values. */
21633 ix86_split_long_move (rtx operands
[])
21638 int collisions
= 0;
21639 enum machine_mode mode
= GET_MODE (operands
[0]);
21640 bool collisionparts
[4];
21642 /* The DFmode expanders may ask us to move double.
21643 For 64bit target this is single move. By hiding the fact
21644 here we simplify i386.md splitters. */
21645 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21647 /* Optimize constant pool reference to immediates. This is used by
21648 fp moves, that force all constants to memory to allow combining. */
21650 if (MEM_P (operands
[1])
21651 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21652 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21653 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21654 if (push_operand (operands
[0], VOIDmode
))
21656 operands
[0] = copy_rtx (operands
[0]);
21657 PUT_MODE (operands
[0], word_mode
);
21660 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21661 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21662 emit_move_insn (operands
[0], operands
[1]);
21666 /* The only non-offsettable memory we handle is push. */
21667 if (push_operand (operands
[0], VOIDmode
))
21670 gcc_assert (!MEM_P (operands
[0])
21671 || offsettable_memref_p (operands
[0]));
21673 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21674 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21676 /* When emitting push, take care for source operands on the stack. */
21677 if (push
&& MEM_P (operands
[1])
21678 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21680 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21682 /* Compensate for the stack decrement by 4. */
21683 if (!TARGET_64BIT
&& nparts
== 3
21684 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21685 src_base
= plus_constant (Pmode
, src_base
, 4);
21687 /* src_base refers to the stack pointer and is
21688 automatically decreased by emitted push. */
21689 for (i
= 0; i
< nparts
; i
++)
21690 part
[1][i
] = change_address (part
[1][i
],
21691 GET_MODE (part
[1][i
]), src_base
);
21694 /* We need to do copy in the right order in case an address register
21695 of the source overlaps the destination. */
21696 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21700 for (i
= 0; i
< nparts
; i
++)
21703 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21704 if (collisionparts
[i
])
21708 /* Collision in the middle part can be handled by reordering. */
21709 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21711 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21712 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21714 else if (collisions
== 1
21716 && (collisionparts
[1] || collisionparts
[2]))
21718 if (collisionparts
[1])
21720 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21721 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21725 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21726 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21730 /* If there are more collisions, we can't handle it by reordering.
21731 Do an lea to the last part and use only one colliding move. */
21732 else if (collisions
> 1)
21738 base
= part
[0][nparts
- 1];
21740 /* Handle the case when the last part isn't valid for lea.
21741 Happens in 64-bit mode storing the 12-byte XFmode. */
21742 if (GET_MODE (base
) != Pmode
)
21743 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21745 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21746 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21747 for (i
= 1; i
< nparts
; i
++)
21749 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21750 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21761 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21762 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21763 stack_pointer_rtx
, GEN_INT (-4)));
21764 emit_move_insn (part
[0][2], part
[1][2]);
21766 else if (nparts
== 4)
21768 emit_move_insn (part
[0][3], part
[1][3]);
21769 emit_move_insn (part
[0][2], part
[1][2]);
21774 /* In 64bit mode we don't have 32bit push available. In case this is
21775 register, it is OK - we will just use larger counterpart. We also
21776 retype memory - these comes from attempt to avoid REX prefix on
21777 moving of second half of TFmode value. */
21778 if (GET_MODE (part
[1][1]) == SImode
)
21780 switch (GET_CODE (part
[1][1]))
21783 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21787 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21791 gcc_unreachable ();
21794 if (GET_MODE (part
[1][0]) == SImode
)
21795 part
[1][0] = part
[1][1];
21798 emit_move_insn (part
[0][1], part
[1][1]);
21799 emit_move_insn (part
[0][0], part
[1][0]);
21803 /* Choose correct order to not overwrite the source before it is copied. */
21804 if ((REG_P (part
[0][0])
21805 && REG_P (part
[1][1])
21806 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21808 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21810 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21812 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21814 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21816 operands
[2 + i
] = part
[0][j
];
21817 operands
[6 + i
] = part
[1][j
];
21822 for (i
= 0; i
< nparts
; i
++)
21824 operands
[2 + i
] = part
[0][i
];
21825 operands
[6 + i
] = part
[1][i
];
21829 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21830 if (optimize_insn_for_size_p ())
21832 for (j
= 0; j
< nparts
- 1; j
++)
21833 if (CONST_INT_P (operands
[6 + j
])
21834 && operands
[6 + j
] != const0_rtx
21835 && REG_P (operands
[2 + j
]))
21836 for (i
= j
; i
< nparts
- 1; i
++)
21837 if (CONST_INT_P (operands
[7 + i
])
21838 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21839 operands
[7 + i
] = operands
[2 + j
];
21842 for (i
= 0; i
< nparts
; i
++)
21843 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21848 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21849 left shift by a constant, either using a single shift or
21850 a sequence of add instructions. */
21853 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21855 rtx (*insn
)(rtx
, rtx
, rtx
);
21858 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21859 && !optimize_insn_for_size_p ()))
21861 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21862 while (count
-- > 0)
21863 emit_insn (insn (operand
, operand
, operand
));
21867 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21868 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21873 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21875 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21876 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21877 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21879 rtx low
[2], high
[2];
21882 if (CONST_INT_P (operands
[2]))
21884 split_double_mode (mode
, operands
, 2, low
, high
);
21885 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21887 if (count
>= half_width
)
21889 emit_move_insn (high
[0], low
[1]);
21890 emit_move_insn (low
[0], const0_rtx
);
21892 if (count
> half_width
)
21893 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21897 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21899 if (!rtx_equal_p (operands
[0], operands
[1]))
21900 emit_move_insn (operands
[0], operands
[1]);
21902 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21903 ix86_expand_ashl_const (low
[0], count
, mode
);
21908 split_double_mode (mode
, operands
, 1, low
, high
);
21910 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21912 if (operands
[1] == const1_rtx
)
21914 /* Assuming we've chosen a QImode capable registers, then 1 << N
21915 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21916 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21918 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21920 ix86_expand_clear (low
[0]);
21921 ix86_expand_clear (high
[0]);
21922 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21924 d
= gen_lowpart (QImode
, low
[0]);
21925 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21926 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21927 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21929 d
= gen_lowpart (QImode
, high
[0]);
21930 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21931 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21932 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21935 /* Otherwise, we can get the same results by manually performing
21936 a bit extract operation on bit 5/6, and then performing the two
21937 shifts. The two methods of getting 0/1 into low/high are exactly
21938 the same size. Avoiding the shift in the bit extract case helps
21939 pentium4 a bit; no one else seems to care much either way. */
21942 enum machine_mode half_mode
;
21943 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21944 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21945 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21946 HOST_WIDE_INT bits
;
21949 if (mode
== DImode
)
21951 half_mode
= SImode
;
21952 gen_lshr3
= gen_lshrsi3
;
21953 gen_and3
= gen_andsi3
;
21954 gen_xor3
= gen_xorsi3
;
21959 half_mode
= DImode
;
21960 gen_lshr3
= gen_lshrdi3
;
21961 gen_and3
= gen_anddi3
;
21962 gen_xor3
= gen_xordi3
;
21966 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21967 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21969 x
= gen_lowpart (half_mode
, operands
[2]);
21970 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21972 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21973 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21974 emit_move_insn (low
[0], high
[0]);
21975 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21978 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21979 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21983 if (operands
[1] == constm1_rtx
)
21985 /* For -1 << N, we can avoid the shld instruction, because we
21986 know that we're shifting 0...31/63 ones into a -1. */
21987 emit_move_insn (low
[0], constm1_rtx
);
21988 if (optimize_insn_for_size_p ())
21989 emit_move_insn (high
[0], low
[0]);
21991 emit_move_insn (high
[0], constm1_rtx
);
21995 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21997 if (!rtx_equal_p (operands
[0], operands
[1]))
21998 emit_move_insn (operands
[0], operands
[1]);
22000 split_double_mode (mode
, operands
, 1, low
, high
);
22001 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
22004 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22006 if (TARGET_CMOVE
&& scratch
)
22008 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22009 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22011 ix86_expand_clear (scratch
);
22012 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
22016 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22017 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22019 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
22024 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22026 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
22027 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
22028 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22029 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22031 rtx low
[2], high
[2];
22034 if (CONST_INT_P (operands
[2]))
22036 split_double_mode (mode
, operands
, 2, low
, high
);
22037 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22039 if (count
== GET_MODE_BITSIZE (mode
) - 1)
22041 emit_move_insn (high
[0], high
[1]);
22042 emit_insn (gen_ashr3 (high
[0], high
[0],
22043 GEN_INT (half_width
- 1)));
22044 emit_move_insn (low
[0], high
[0]);
22047 else if (count
>= half_width
)
22049 emit_move_insn (low
[0], high
[1]);
22050 emit_move_insn (high
[0], low
[0]);
22051 emit_insn (gen_ashr3 (high
[0], high
[0],
22052 GEN_INT (half_width
- 1)));
22054 if (count
> half_width
)
22055 emit_insn (gen_ashr3 (low
[0], low
[0],
22056 GEN_INT (count
- half_width
)));
22060 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22062 if (!rtx_equal_p (operands
[0], operands
[1]))
22063 emit_move_insn (operands
[0], operands
[1]);
22065 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22066 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
22071 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22073 if (!rtx_equal_p (operands
[0], operands
[1]))
22074 emit_move_insn (operands
[0], operands
[1]);
22076 split_double_mode (mode
, operands
, 1, low
, high
);
22078 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22079 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
22081 if (TARGET_CMOVE
&& scratch
)
22083 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22084 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22086 emit_move_insn (scratch
, high
[0]);
22087 emit_insn (gen_ashr3 (scratch
, scratch
,
22088 GEN_INT (half_width
- 1)));
22089 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22094 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
22095 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
22097 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
22103 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22105 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
22106 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
22107 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22108 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22110 rtx low
[2], high
[2];
22113 if (CONST_INT_P (operands
[2]))
22115 split_double_mode (mode
, operands
, 2, low
, high
);
22116 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22118 if (count
>= half_width
)
22120 emit_move_insn (low
[0], high
[1]);
22121 ix86_expand_clear (high
[0]);
22123 if (count
> half_width
)
22124 emit_insn (gen_lshr3 (low
[0], low
[0],
22125 GEN_INT (count
- half_width
)));
22129 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22131 if (!rtx_equal_p (operands
[0], operands
[1]))
22132 emit_move_insn (operands
[0], operands
[1]);
22134 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22135 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
22140 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22142 if (!rtx_equal_p (operands
[0], operands
[1]))
22143 emit_move_insn (operands
[0], operands
[1]);
22145 split_double_mode (mode
, operands
, 1, low
, high
);
22147 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22148 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
22150 if (TARGET_CMOVE
&& scratch
)
22152 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22153 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22155 ix86_expand_clear (scratch
);
22156 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22161 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22162 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22164 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
22169 /* Predict just emitted jump instruction to be taken with probability PROB. */
22171 predict_jump (int prob
)
22173 rtx insn
= get_last_insn ();
22174 gcc_assert (JUMP_P (insn
));
22175 add_int_reg_note (insn
, REG_BR_PROB
, prob
);
22178 /* Helper function for the string operations below. Dest VARIABLE whether
22179 it is aligned to VALUE bytes. If true, jump to the label. */
22181 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
22183 rtx label
= gen_label_rtx ();
22184 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
22185 if (GET_MODE (variable
) == DImode
)
22186 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
22188 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
22189 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
22192 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
22194 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22198 /* Adjust COUNTER by the VALUE. */
22200 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
22202 rtx (*gen_add
)(rtx
, rtx
, rtx
)
22203 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
22205 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
22208 /* Zero extend possibly SImode EXP to Pmode register. */
22210 ix86_zero_extend_to_Pmode (rtx exp
)
22212 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22215 /* Divide COUNTREG by SCALE. */
22217 scale_counter (rtx countreg
, int scale
)
22223 if (CONST_INT_P (countreg
))
22224 return GEN_INT (INTVAL (countreg
) / scale
);
22225 gcc_assert (REG_P (countreg
));
22227 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22228 GEN_INT (exact_log2 (scale
)),
22229 NULL
, 1, OPTAB_DIRECT
);
22233 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22234 DImode for constant loop counts. */
22236 static enum machine_mode
22237 counter_mode (rtx count_exp
)
22239 if (GET_MODE (count_exp
) != VOIDmode
)
22240 return GET_MODE (count_exp
);
22241 if (!CONST_INT_P (count_exp
))
22243 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22248 /* Copy the address to a Pmode register. This is used for x32 to
22249 truncate DImode TLS address to a SImode register. */
22252 ix86_copy_addr_to_reg (rtx addr
)
22254 if (GET_MODE (addr
) == Pmode
)
22255 return copy_addr_to_reg (addr
);
22258 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
22259 return gen_rtx_SUBREG (SImode
, copy_to_mode_reg (DImode
, addr
), 0);
22263 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22264 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22265 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22266 memory by VALUE (supposed to be in MODE).
22268 The size is rounded down to whole number of chunk size moved at once.
22269 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22273 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22274 rtx destptr
, rtx srcptr
, rtx value
,
22275 rtx count
, enum machine_mode mode
, int unroll
,
22276 int expected_size
, bool issetmem
)
22278 rtx out_label
, top_label
, iter
, tmp
;
22279 enum machine_mode iter_mode
= counter_mode (count
);
22280 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22281 rtx piece_size
= GEN_INT (piece_size_n
);
22282 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22286 top_label
= gen_label_rtx ();
22287 out_label
= gen_label_rtx ();
22288 iter
= gen_reg_rtx (iter_mode
);
22290 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22291 NULL
, 1, OPTAB_DIRECT
);
22292 /* Those two should combine. */
22293 if (piece_size
== const1_rtx
)
22295 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22297 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22299 emit_move_insn (iter
, const0_rtx
);
22301 emit_label (top_label
);
22303 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22305 /* This assert could be relaxed - in this case we'll need to compute
22306 smallest power of two, containing in PIECE_SIZE_N and pass it to
22308 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22309 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22310 destmem
= adjust_address (destmem
, mode
, 0);
22314 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22315 srcmem
= adjust_address (srcmem
, mode
, 0);
22317 /* When unrolling for chips that reorder memory reads and writes,
22318 we can save registers by using single temporary.
22319 Also using 4 temporaries is overkill in 32bit mode. */
22320 if (!TARGET_64BIT
&& 0)
22322 for (i
= 0; i
< unroll
; i
++)
22327 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22329 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22331 emit_move_insn (destmem
, srcmem
);
22337 gcc_assert (unroll
<= 4);
22338 for (i
= 0; i
< unroll
; i
++)
22340 tmpreg
[i
] = gen_reg_rtx (mode
);
22344 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22346 emit_move_insn (tmpreg
[i
], srcmem
);
22348 for (i
= 0; i
< unroll
; i
++)
22353 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22355 emit_move_insn (destmem
, tmpreg
[i
]);
22360 for (i
= 0; i
< unroll
; i
++)
22364 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22365 emit_move_insn (destmem
, value
);
22368 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22369 true, OPTAB_LIB_WIDEN
);
22371 emit_move_insn (iter
, tmp
);
22373 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22375 if (expected_size
!= -1)
22377 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22378 if (expected_size
== 0)
22380 else if (expected_size
> REG_BR_PROB_BASE
)
22381 predict_jump (REG_BR_PROB_BASE
- 1);
22383 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22386 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22387 iter
= ix86_zero_extend_to_Pmode (iter
);
22388 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22389 true, OPTAB_LIB_WIDEN
);
22390 if (tmp
!= destptr
)
22391 emit_move_insn (destptr
, tmp
);
22394 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22395 true, OPTAB_LIB_WIDEN
);
22397 emit_move_insn (srcptr
, tmp
);
22399 emit_label (out_label
);
22402 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22403 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22404 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22405 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22406 ORIG_VALUE is the original value passed to memset to fill the memory with.
22407 Other arguments have same meaning as for previous function. */
22410 expand_set_or_movmem_via_rep (rtx destmem
, rtx srcmem
,
22411 rtx destptr
, rtx srcptr
, rtx value
, rtx orig_value
,
22413 enum machine_mode mode
, bool issetmem
)
22418 HOST_WIDE_INT rounded_count
;
22420 /* If possible, it is shorter to use rep movs.
22421 TODO: Maybe it is better to move this logic to decide_alg. */
22422 if (mode
== QImode
&& CONST_INT_P (count
) && !(INTVAL (count
) & 3)
22423 && (!issetmem
|| orig_value
== const0_rtx
))
22426 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22427 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22429 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
,
22430 GET_MODE_SIZE (mode
)));
22431 if (mode
!= QImode
)
22433 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22434 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22435 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22438 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22439 if ((!issetmem
|| orig_value
== const0_rtx
) && CONST_INT_P (count
))
22441 rounded_count
= (INTVAL (count
)
22442 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22443 destmem
= shallow_copy_rtx (destmem
);
22444 set_mem_size (destmem
, rounded_count
);
22446 else if (MEM_SIZE_KNOWN_P (destmem
))
22447 clear_mem_size (destmem
);
22451 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22452 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22456 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22457 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22458 if (mode
!= QImode
)
22460 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22461 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22462 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22465 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22466 if (CONST_INT_P (count
))
22468 rounded_count
= (INTVAL (count
)
22469 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22470 srcmem
= shallow_copy_rtx (srcmem
);
22471 set_mem_size (srcmem
, rounded_count
);
22475 if (MEM_SIZE_KNOWN_P (srcmem
))
22476 clear_mem_size (srcmem
);
22478 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22483 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22485 SRC is passed by pointer to be updated on return.
22486 Return value is updated DST. */
22488 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22489 HOST_WIDE_INT size_to_move
)
22491 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22492 enum insn_code code
;
22493 enum machine_mode move_mode
;
22496 /* Find the widest mode in which we could perform moves.
22497 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22498 it until move of such size is supported. */
22499 piece_size
= 1 << floor_log2 (size_to_move
);
22500 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22501 code
= optab_handler (mov_optab
, move_mode
);
22502 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22505 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22506 code
= optab_handler (mov_optab
, move_mode
);
22509 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22510 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22511 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22513 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22514 move_mode
= mode_for_vector (word_mode
, nunits
);
22515 code
= optab_handler (mov_optab
, move_mode
);
22516 if (code
== CODE_FOR_nothing
)
22518 move_mode
= word_mode
;
22519 piece_size
= GET_MODE_SIZE (move_mode
);
22520 code
= optab_handler (mov_optab
, move_mode
);
22523 gcc_assert (code
!= CODE_FOR_nothing
);
22525 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22526 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22528 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22529 gcc_assert (size_to_move
% piece_size
== 0);
22530 adjust
= GEN_INT (piece_size
);
22531 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22533 /* We move from memory to memory, so we'll need to do it via
22534 a temporary register. */
22535 tempreg
= gen_reg_rtx (move_mode
);
22536 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22537 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22539 emit_move_insn (destptr
,
22540 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22541 emit_move_insn (srcptr
,
22542 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22544 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22546 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22550 /* Update DST and SRC rtx. */
22555 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22557 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22558 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22561 if (CONST_INT_P (count
))
22563 HOST_WIDE_INT countval
= INTVAL (count
);
22564 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22567 /* For now MAX_SIZE should be a power of 2. This assert could be
22568 relaxed, but it'll require a bit more complicated epilogue
22570 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22571 for (i
= max_size
; i
>= 1; i
>>= 1)
22573 if (epilogue_size
& i
)
22574 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22580 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22581 count
, 1, OPTAB_DIRECT
);
22582 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22583 count
, QImode
, 1, 4, false);
22587 /* When there are stringops, we can cheaply increase dest and src pointers.
22588 Otherwise we save code size by maintaining offset (zero is readily
22589 available from preceding rep operation) and using x86 addressing modes.
22591 if (TARGET_SINGLE_STRINGOP
)
22595 rtx label
= ix86_expand_aligntest (count
, 4, true);
22596 src
= change_address (srcmem
, SImode
, srcptr
);
22597 dest
= change_address (destmem
, SImode
, destptr
);
22598 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22599 emit_label (label
);
22600 LABEL_NUSES (label
) = 1;
22604 rtx label
= ix86_expand_aligntest (count
, 2, true);
22605 src
= change_address (srcmem
, HImode
, srcptr
);
22606 dest
= change_address (destmem
, HImode
, destptr
);
22607 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22608 emit_label (label
);
22609 LABEL_NUSES (label
) = 1;
22613 rtx label
= ix86_expand_aligntest (count
, 1, true);
22614 src
= change_address (srcmem
, QImode
, srcptr
);
22615 dest
= change_address (destmem
, QImode
, destptr
);
22616 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22617 emit_label (label
);
22618 LABEL_NUSES (label
) = 1;
22623 rtx offset
= force_reg (Pmode
, const0_rtx
);
22628 rtx label
= ix86_expand_aligntest (count
, 4, true);
22629 src
= change_address (srcmem
, SImode
, srcptr
);
22630 dest
= change_address (destmem
, SImode
, destptr
);
22631 emit_move_insn (dest
, src
);
22632 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22633 true, OPTAB_LIB_WIDEN
);
22635 emit_move_insn (offset
, tmp
);
22636 emit_label (label
);
22637 LABEL_NUSES (label
) = 1;
22641 rtx label
= ix86_expand_aligntest (count
, 2, true);
22642 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22643 src
= change_address (srcmem
, HImode
, tmp
);
22644 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22645 dest
= change_address (destmem
, HImode
, tmp
);
22646 emit_move_insn (dest
, src
);
22647 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22648 true, OPTAB_LIB_WIDEN
);
22650 emit_move_insn (offset
, tmp
);
22651 emit_label (label
);
22652 LABEL_NUSES (label
) = 1;
22656 rtx label
= ix86_expand_aligntest (count
, 1, true);
22657 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22658 src
= change_address (srcmem
, QImode
, tmp
);
22659 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22660 dest
= change_address (destmem
, QImode
, tmp
);
22661 emit_move_insn (dest
, src
);
22662 emit_label (label
);
22663 LABEL_NUSES (label
) = 1;
22668 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
22669 with value PROMOTED_VAL.
22670 SRC is passed by pointer to be updated on return.
22671 Return value is updated DST. */
22673 emit_memset (rtx destmem
, rtx destptr
, rtx promoted_val
,
22674 HOST_WIDE_INT size_to_move
)
22676 rtx dst
= destmem
, adjust
;
22677 enum insn_code code
;
22678 enum machine_mode move_mode
;
22681 /* Find the widest mode in which we could perform moves.
22682 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22683 it until move of such size is supported. */
22684 move_mode
= GET_MODE (promoted_val
);
22685 if (move_mode
== VOIDmode
)
22686 move_mode
= QImode
;
22687 if (size_to_move
< GET_MODE_SIZE (move_mode
))
22689 move_mode
= mode_for_size (size_to_move
* BITS_PER_UNIT
, MODE_INT
, 0);
22690 promoted_val
= gen_lowpart (move_mode
, promoted_val
);
22692 piece_size
= GET_MODE_SIZE (move_mode
);
22693 code
= optab_handler (mov_optab
, move_mode
);
22694 gcc_assert (code
!= CODE_FOR_nothing
&& promoted_val
!= NULL_RTX
);
22696 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22698 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22699 gcc_assert (size_to_move
% piece_size
== 0);
22700 adjust
= GEN_INT (piece_size
);
22701 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22703 if (piece_size
<= GET_MODE_SIZE (word_mode
))
22705 emit_insn (gen_strset (destptr
, dst
, promoted_val
));
22709 emit_insn (GEN_FCN (code
) (dst
, promoted_val
));
22711 emit_move_insn (destptr
,
22712 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22714 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22718 /* Update DST rtx. */
22721 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22723 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22724 rtx count
, int max_size
)
22727 expand_simple_binop (counter_mode (count
), AND
, count
,
22728 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22729 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22730 gen_lowpart (QImode
, value
), count
, QImode
,
22731 1, max_size
/ 2, true);
22734 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22736 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx vec_value
,
22737 rtx count
, int max_size
)
22741 if (CONST_INT_P (count
))
22743 HOST_WIDE_INT countval
= INTVAL (count
);
22744 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22747 /* For now MAX_SIZE should be a power of 2. This assert could be
22748 relaxed, but it'll require a bit more complicated epilogue
22750 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22751 for (i
= max_size
; i
>= 1; i
>>= 1)
22753 if (epilogue_size
& i
)
22755 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
22756 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
22758 destmem
= emit_memset (destmem
, destptr
, value
, i
);
22765 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22770 rtx label
= ix86_expand_aligntest (count
, 16, true);
22773 dest
= change_address (destmem
, DImode
, destptr
);
22774 emit_insn (gen_strset (destptr
, dest
, value
));
22775 emit_insn (gen_strset (destptr
, dest
, value
));
22779 dest
= change_address (destmem
, SImode
, destptr
);
22780 emit_insn (gen_strset (destptr
, dest
, value
));
22781 emit_insn (gen_strset (destptr
, dest
, value
));
22782 emit_insn (gen_strset (destptr
, dest
, value
));
22783 emit_insn (gen_strset (destptr
, dest
, value
));
22785 emit_label (label
);
22786 LABEL_NUSES (label
) = 1;
22790 rtx label
= ix86_expand_aligntest (count
, 8, true);
22793 dest
= change_address (destmem
, DImode
, destptr
);
22794 emit_insn (gen_strset (destptr
, dest
, value
));
22798 dest
= change_address (destmem
, SImode
, destptr
);
22799 emit_insn (gen_strset (destptr
, dest
, value
));
22800 emit_insn (gen_strset (destptr
, dest
, value
));
22802 emit_label (label
);
22803 LABEL_NUSES (label
) = 1;
22807 rtx label
= ix86_expand_aligntest (count
, 4, true);
22808 dest
= change_address (destmem
, SImode
, destptr
);
22809 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22810 emit_label (label
);
22811 LABEL_NUSES (label
) = 1;
22815 rtx label
= ix86_expand_aligntest (count
, 2, true);
22816 dest
= change_address (destmem
, HImode
, destptr
);
22817 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22818 emit_label (label
);
22819 LABEL_NUSES (label
) = 1;
22823 rtx label
= ix86_expand_aligntest (count
, 1, true);
22824 dest
= change_address (destmem
, QImode
, destptr
);
22825 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22826 emit_label (label
);
22827 LABEL_NUSES (label
) = 1;
22831 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
22832 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
22833 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
22835 Return value is updated DESTMEM. */
22837 expand_set_or_movmem_prologue (rtx destmem
, rtx srcmem
,
22838 rtx destptr
, rtx srcptr
, rtx value
,
22839 rtx vec_value
, rtx count
, int align
,
22840 int desired_alignment
, bool issetmem
)
22843 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22847 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22850 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
22851 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
22853 destmem
= emit_memset (destmem
, destptr
, value
, i
);
22856 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22857 ix86_adjust_counter (count
, i
);
22858 emit_label (label
);
22859 LABEL_NUSES (label
) = 1;
22860 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22866 /* Test if COUNT&SIZE is nonzero and if so, expand movme
22867 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
22868 and jump to DONE_LABEL. */
22870 expand_small_movmem_or_setmem (rtx destmem
, rtx srcmem
,
22871 rtx destptr
, rtx srcptr
,
22872 rtx value
, rtx vec_value
,
22873 rtx count
, int size
,
22874 rtx done_label
, bool issetmem
)
22876 rtx label
= ix86_expand_aligntest (count
, size
, false);
22877 enum machine_mode mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 1);
22881 /* If we do not have vector value to copy, we must reduce size. */
22886 if (GET_MODE (value
) == VOIDmode
&& size
> 8)
22888 else if (GET_MODE_SIZE (mode
) > GET_MODE_SIZE (GET_MODE (value
)))
22889 mode
= GET_MODE (value
);
22892 mode
= GET_MODE (vec_value
), value
= vec_value
;
22896 /* Choose appropriate vector mode. */
22898 mode
= TARGET_AVX
? V32QImode
: TARGET_SSE
? V16QImode
: DImode
;
22899 else if (size
>= 16)
22900 mode
= TARGET_SSE
? V16QImode
: DImode
;
22901 srcmem
= change_address (srcmem
, mode
, srcptr
);
22903 destmem
= change_address (destmem
, mode
, destptr
);
22904 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
22905 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
22906 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
22909 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
22912 emit_move_insn (destmem
, srcmem
);
22913 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
22915 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
22918 destmem
= offset_address (destmem
, count
, 1);
22919 destmem
= offset_address (destmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
22920 GET_MODE_SIZE (mode
));
22922 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
22925 srcmem
= offset_address (srcmem
, count
, 1);
22926 srcmem
= offset_address (srcmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
22927 GET_MODE_SIZE (mode
));
22928 emit_move_insn (destmem
, srcmem
);
22930 emit_jump_insn (gen_jump (done_label
));
22933 emit_label (label
);
22934 LABEL_NUSES (label
) = 1;
22937 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
22938 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
22939 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
22940 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
22941 DONE_LABEL is a label after the whole copying sequence. The label is created
22942 on demand if *DONE_LABEL is NULL.
22943 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
22944 bounds after the initial copies.
22946 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
22947 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
22948 we will dispatch to a library call for large blocks.
22950 In pseudocode we do:
22954 Assume that SIZE is 4. Bigger sizes are handled analogously
22957 copy 4 bytes from SRCPTR to DESTPTR
22958 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
22963 copy 1 byte from SRCPTR to DESTPTR
22966 copy 2 bytes from SRCPTR to DESTPTR
22967 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
22972 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
22973 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
22975 OLD_DESPTR = DESTPTR;
22976 Align DESTPTR up to DESIRED_ALIGN
22977 SRCPTR += DESTPTR - OLD_DESTPTR
22978 COUNT -= DEST_PTR - OLD_DESTPTR
22980 Round COUNT down to multiple of SIZE
22981 << optional caller supplied zero size guard is here >>
22982 << optional caller suppplied dynamic check is here >>
22983 << caller supplied main copy loop is here >>
22988 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem
, rtx srcmem
,
22989 rtx
*destptr
, rtx
*srcptr
,
22990 enum machine_mode mode
,
22991 rtx value
, rtx vec_value
,
22997 unsigned HOST_WIDE_INT
*min_size
,
22998 bool dynamic_check
,
23001 rtx loop_label
= NULL
, label
;
23004 int prolog_size
= 0;
23007 /* Chose proper value to copy. */
23008 if (issetmem
&& VECTOR_MODE_P (mode
))
23009 mode_value
= vec_value
;
23011 mode_value
= value
;
23012 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23014 /* See if block is big or small, handle small blocks. */
23015 if (!CONST_INT_P (*count
) && *min_size
< (unsigned HOST_WIDE_INT
)size
)
23018 loop_label
= gen_label_rtx ();
23021 *done_label
= gen_label_rtx ();
23023 emit_cmp_and_jump_insns (*count
, GEN_INT (size2
), GE
, 0, GET_MODE (*count
),
23027 /* Handle sizes > 3. */
23028 for (;size2
> 2; size2
>>= 1)
23029 expand_small_movmem_or_setmem (destmem
, srcmem
,
23033 size2
, *done_label
, issetmem
);
23034 /* Nothing to copy? Jump to DONE_LABEL if so */
23035 emit_cmp_and_jump_insns (*count
, const0_rtx
, EQ
, 0, GET_MODE (*count
),
23038 /* Do a byte copy. */
23039 destmem
= change_address (destmem
, QImode
, *destptr
);
23041 emit_move_insn (destmem
, gen_lowpart (QImode
, value
));
23044 srcmem
= change_address (srcmem
, QImode
, *srcptr
);
23045 emit_move_insn (destmem
, srcmem
);
23048 /* Handle sizes 2 and 3. */
23049 label
= ix86_expand_aligntest (*count
, 2, false);
23050 destmem
= change_address (destmem
, HImode
, *destptr
);
23051 destmem
= offset_address (destmem
, *count
, 1);
23052 destmem
= offset_address (destmem
, GEN_INT (-2), 2);
23054 emit_move_insn (destmem
, gen_lowpart (HImode
, value
));
23057 srcmem
= change_address (srcmem
, HImode
, *srcptr
);
23058 srcmem
= offset_address (srcmem
, *count
, 1);
23059 srcmem
= offset_address (srcmem
, GEN_INT (-2), 2);
23060 emit_move_insn (destmem
, srcmem
);
23063 emit_label (label
);
23064 LABEL_NUSES (label
) = 1;
23065 emit_jump_insn (gen_jump (*done_label
));
23069 gcc_assert (*min_size
>= (unsigned HOST_WIDE_INT
)size
23070 || UINTVAL (*count
) >= (unsigned HOST_WIDE_INT
)size
);
23072 /* Start memcpy for COUNT >= SIZE. */
23075 emit_label (loop_label
);
23076 LABEL_NUSES (loop_label
) = 1;
23079 /* Copy first desired_align bytes. */
23081 srcmem
= change_address (srcmem
, mode
, *srcptr
);
23082 destmem
= change_address (destmem
, mode
, *destptr
);
23083 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23084 for (n
= 0; prolog_size
< desired_align
- align
; n
++)
23087 emit_move_insn (destmem
, mode_value
);
23090 emit_move_insn (destmem
, srcmem
);
23091 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23093 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23094 prolog_size
+= GET_MODE_SIZE (mode
);
23098 /* Copy last SIZE bytes. */
23099 destmem
= offset_address (destmem
, *count
, 1);
23100 destmem
= offset_address (destmem
,
23101 GEN_INT (-size
- prolog_size
),
23104 emit_move_insn (destmem
, mode_value
);
23107 srcmem
= offset_address (srcmem
, *count
, 1);
23108 srcmem
= offset_address (srcmem
,
23109 GEN_INT (-size
- prolog_size
),
23111 emit_move_insn (destmem
, srcmem
);
23113 for (n
= 1; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23115 destmem
= offset_address (destmem
, modesize
, 1);
23117 emit_move_insn (destmem
, mode_value
);
23120 srcmem
= offset_address (srcmem
, modesize
, 1);
23121 emit_move_insn (destmem
, srcmem
);
23125 /* Align destination. */
23126 if (desired_align
> 1 && desired_align
> align
)
23128 rtx saveddest
= *destptr
;
23130 gcc_assert (desired_align
<= size
);
23131 /* Align destptr up, place it to new register. */
23132 *destptr
= expand_simple_binop (GET_MODE (*destptr
), PLUS
, *destptr
,
23133 GEN_INT (prolog_size
),
23134 NULL_RTX
, 1, OPTAB_DIRECT
);
23135 *destptr
= expand_simple_binop (GET_MODE (*destptr
), AND
, *destptr
,
23136 GEN_INT (-desired_align
),
23137 *destptr
, 1, OPTAB_DIRECT
);
23138 /* See how many bytes we skipped. */
23139 saveddest
= expand_simple_binop (GET_MODE (*destptr
), MINUS
, saveddest
,
23141 saveddest
, 1, OPTAB_DIRECT
);
23142 /* Adjust srcptr and count. */
23144 *srcptr
= expand_simple_binop (GET_MODE (*srcptr
), MINUS
, *srcptr
, saveddest
,
23145 *srcptr
, 1, OPTAB_DIRECT
);
23146 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23147 saveddest
, *count
, 1, OPTAB_DIRECT
);
23148 /* We copied at most size + prolog_size. */
23149 if (*min_size
> (unsigned HOST_WIDE_INT
)(size
+ prolog_size
))
23150 *min_size
= (*min_size
- size
) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23154 /* Our loops always round down the bock size, but for dispatch to library
23155 we need precise value. */
23157 *count
= expand_simple_binop (GET_MODE (*count
), AND
, *count
,
23158 GEN_INT (-size
), *count
, 1, OPTAB_DIRECT
);
23162 gcc_assert (prolog_size
== 0);
23163 /* Decrease count, so we won't end up copying last word twice. */
23164 if (!CONST_INT_P (*count
))
23165 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23166 constm1_rtx
, *count
, 1, OPTAB_DIRECT
);
23168 *count
= GEN_INT ((UINTVAL (*count
) - 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1));
23170 *min_size
= (*min_size
- 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23175 /* This function is like the previous one, except here we know how many bytes
23176 need to be copied. That allows us to update alignment not only of DST, which
23177 is returned, but also of SRC, which is passed as a pointer for that
23180 expand_set_or_movmem_constant_prologue (rtx dst
, rtx
*srcp
, rtx destreg
,
23181 rtx srcreg
, rtx value
, rtx vec_value
,
23182 int desired_align
, int align_bytes
,
23186 rtx orig_dst
= dst
;
23187 rtx orig_src
= NULL
;
23188 int piece_size
= 1;
23189 int copied_bytes
= 0;
23193 gcc_assert (srcp
!= NULL
);
23198 for (piece_size
= 1;
23199 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
23202 if (align_bytes
& piece_size
)
23206 if (vec_value
&& piece_size
> GET_MODE_SIZE (GET_MODE (value
)))
23207 dst
= emit_memset (dst
, destreg
, vec_value
, piece_size
);
23209 dst
= emit_memset (dst
, destreg
, value
, piece_size
);
23212 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
23213 copied_bytes
+= piece_size
;
23216 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
23217 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23218 if (MEM_SIZE_KNOWN_P (orig_dst
))
23219 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
23223 int src_align_bytes
= get_mem_align_offset (src
, desired_align
23225 if (src_align_bytes
>= 0)
23226 src_align_bytes
= desired_align
- src_align_bytes
;
23227 if (src_align_bytes
>= 0)
23229 unsigned int src_align
;
23230 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
23232 if ((src_align_bytes
& (src_align
- 1))
23233 == (align_bytes
& (src_align
- 1)))
23236 if (src_align
> (unsigned int) desired_align
)
23237 src_align
= desired_align
;
23238 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
23239 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
23241 if (MEM_SIZE_KNOWN_P (orig_src
))
23242 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
23249 /* Return true if ALG can be used in current context.
23250 Assume we expand memset if MEMSET is true. */
23252 alg_usable_p (enum stringop_alg alg
, bool memset
)
23254 if (alg
== no_stringop
)
23256 if (alg
== vector_loop
)
23257 return TARGET_SSE
|| TARGET_AVX
;
23258 /* Algorithms using the rep prefix want at least edi and ecx;
23259 additionally, memset wants eax and memcpy wants esi. Don't
23260 consider such algorithms if the user has appropriated those
23261 registers for their own purposes. */
23262 if (alg
== rep_prefix_1_byte
23263 || alg
== rep_prefix_4_byte
23264 || alg
== rep_prefix_8_byte
)
23265 return !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
23266 || (memset
? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
23270 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23271 static enum stringop_alg
23272 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
,
23273 unsigned HOST_WIDE_INT min_size
, unsigned HOST_WIDE_INT max_size
,
23274 bool memset
, bool zero_memset
, int *dynamic_check
, bool *noalign
)
23276 const struct stringop_algs
* algs
;
23277 bool optimize_for_speed
;
23279 const struct processor_costs
*cost
;
23281 bool any_alg_usable_p
= false;
23284 *dynamic_check
= -1;
23286 /* Even if the string operation call is cold, we still might spend a lot
23287 of time processing large blocks. */
23288 if (optimize_function_for_size_p (cfun
)
23289 || (optimize_insn_for_size_p ()
23291 || (expected_size
!= -1 && expected_size
< 256))))
23292 optimize_for_speed
= false;
23294 optimize_for_speed
= true;
23296 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
23298 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
23300 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
23302 /* See maximal size for user defined algorithm. */
23303 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23305 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23306 bool usable
= alg_usable_p (candidate
, memset
);
23307 any_alg_usable_p
|= usable
;
23309 if (candidate
!= libcall
&& candidate
&& usable
)
23310 max
= algs
->size
[i
].max
;
23313 /* If expected size is not known but max size is small enough
23314 so inline version is a win, set expected size into
23316 if (max
> 1 && (unsigned HOST_WIDE_INT
)max
>= max_size
&& expected_size
== -1)
23317 expected_size
= min_size
/ 2 + max_size
/ 2;
23319 /* If user specified the algorithm, honnor it if possible. */
23320 if (ix86_stringop_alg
!= no_stringop
23321 && alg_usable_p (ix86_stringop_alg
, memset
))
23322 return ix86_stringop_alg
;
23323 /* rep; movq or rep; movl is the smallest variant. */
23324 else if (!optimize_for_speed
)
23327 if (!count
|| (count
& 3) || (memset
&& !zero_memset
))
23328 return alg_usable_p (rep_prefix_1_byte
, memset
)
23329 ? rep_prefix_1_byte
: loop_1_byte
;
23331 return alg_usable_p (rep_prefix_4_byte
, memset
)
23332 ? rep_prefix_4_byte
: loop
;
23334 /* Very tiny blocks are best handled via the loop, REP is expensive to
23336 else if (expected_size
!= -1 && expected_size
< 4)
23337 return loop_1_byte
;
23338 else if (expected_size
!= -1)
23340 enum stringop_alg alg
= libcall
;
23341 bool alg_noalign
= false;
23342 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23344 /* We get here if the algorithms that were not libcall-based
23345 were rep-prefix based and we are unable to use rep prefixes
23346 based on global register usage. Break out of the loop and
23347 use the heuristic below. */
23348 if (algs
->size
[i
].max
== 0)
23350 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
23352 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23354 if (candidate
!= libcall
&& alg_usable_p (candidate
, memset
))
23357 alg_noalign
= algs
->size
[i
].noalign
;
23359 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23360 last non-libcall inline algorithm. */
23361 if (TARGET_INLINE_ALL_STRINGOPS
)
23363 /* When the current size is best to be copied by a libcall,
23364 but we are still forced to inline, run the heuristic below
23365 that will pick code for medium sized blocks. */
23366 if (alg
!= libcall
)
23368 *noalign
= alg_noalign
;
23373 else if (alg_usable_p (candidate
, memset
))
23375 *noalign
= algs
->size
[i
].noalign
;
23381 /* When asked to inline the call anyway, try to pick meaningful choice.
23382 We look for maximal size of block that is faster to copy by hand and
23383 take blocks of at most of that size guessing that average size will
23384 be roughly half of the block.
23386 If this turns out to be bad, we might simply specify the preferred
23387 choice in ix86_costs. */
23388 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23389 && (algs
->unknown_size
== libcall
23390 || !alg_usable_p (algs
->unknown_size
, memset
)))
23392 enum stringop_alg alg
;
23394 /* If there aren't any usable algorithms, then recursing on
23395 smaller sizes isn't going to find anything. Just return the
23396 simple byte-at-a-time copy loop. */
23397 if (!any_alg_usable_p
)
23399 /* Pick something reasonable. */
23400 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23401 *dynamic_check
= 128;
23402 return loop_1_byte
;
23406 alg
= decide_alg (count
, max
/ 2, min_size
, max_size
, memset
,
23407 zero_memset
, dynamic_check
, noalign
);
23408 gcc_assert (*dynamic_check
== -1);
23409 gcc_assert (alg
!= libcall
);
23410 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23411 *dynamic_check
= max
;
23414 return (alg_usable_p (algs
->unknown_size
, memset
)
23415 ? algs
->unknown_size
: libcall
);
23418 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23419 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23421 decide_alignment (int align
,
23422 enum stringop_alg alg
,
23424 enum machine_mode move_mode
)
23426 int desired_align
= 0;
23428 gcc_assert (alg
!= no_stringop
);
23430 if (alg
== libcall
)
23432 if (move_mode
== VOIDmode
)
23435 desired_align
= GET_MODE_SIZE (move_mode
);
23436 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23437 copying whole cacheline at once. */
23438 if (TARGET_PENTIUMPRO
23439 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
23444 if (desired_align
< align
)
23445 desired_align
= align
;
23446 if (expected_size
!= -1 && expected_size
< 4)
23447 desired_align
= align
;
23449 return desired_align
;
23453 /* Helper function for memcpy. For QImode value 0xXY produce
23454 0xXYXYXYXY of wide specified by MODE. This is essentially
23455 a * 0x10101010, but we can do slightly better than
23456 synth_mult by unwinding the sequence by hand on CPUs with
23459 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23461 enum machine_mode valmode
= GET_MODE (val
);
23463 int nops
= mode
== DImode
? 3 : 2;
23465 gcc_assert (mode
== SImode
|| mode
== DImode
|| val
== const0_rtx
);
23466 if (val
== const0_rtx
)
23467 return copy_to_mode_reg (mode
, CONST0_RTX (mode
));
23468 if (CONST_INT_P (val
))
23470 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23474 if (mode
== DImode
)
23475 v
|= (v
<< 16) << 16;
23476 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23479 if (valmode
== VOIDmode
)
23481 if (valmode
!= QImode
)
23482 val
= gen_lowpart (QImode
, val
);
23483 if (mode
== QImode
)
23485 if (!TARGET_PARTIAL_REG_STALL
)
23487 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23488 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23489 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23490 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23492 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23493 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23494 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23499 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23501 if (!TARGET_PARTIAL_REG_STALL
)
23502 if (mode
== SImode
)
23503 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23505 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23508 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23509 NULL
, 1, OPTAB_DIRECT
);
23511 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23513 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23514 NULL
, 1, OPTAB_DIRECT
);
23515 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23516 if (mode
== SImode
)
23518 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23519 NULL
, 1, OPTAB_DIRECT
);
23520 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23525 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23526 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23527 alignment from ALIGN to DESIRED_ALIGN. */
23529 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
,
23535 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23536 promoted_val
= promote_duplicated_reg (DImode
, val
);
23537 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23538 promoted_val
= promote_duplicated_reg (SImode
, val
);
23539 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23540 promoted_val
= promote_duplicated_reg (HImode
, val
);
23542 promoted_val
= val
;
23544 return promoted_val
;
23547 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
23548 operations when profitable. The code depends upon architecture, block size
23549 and alignment, but always has one of the following overall structures:
23551 Aligned move sequence:
23553 1) Prologue guard: Conditional that jumps up to epilogues for small
23554 blocks that can be handled by epilogue alone. This is faster
23555 but also needed for correctness, since prologue assume the block
23556 is larger than the desired alignment.
23558 Optional dynamic check for size and libcall for large
23559 blocks is emitted here too, with -minline-stringops-dynamically.
23561 2) Prologue: copy first few bytes in order to get destination
23562 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
23563 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
23564 copied. We emit either a jump tree on power of two sized
23565 blocks, or a byte loop.
23567 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23568 with specified algorithm.
23570 4) Epilogue: code copying tail of the block that is too small to be
23571 handled by main body (or up to size guarded by prologue guard).
23573 Misaligned move sequence
23575 1) missaligned move prologue/epilogue containing:
23576 a) Prologue handling small memory blocks and jumping to done_label
23577 (skipped if blocks are known to be large enough)
23578 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
23579 needed by single possibly misaligned move
23580 (skipped if alignment is not needed)
23581 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
23583 2) Zero size guard dispatching to done_label, if needed
23585 3) dispatch to library call, if needed,
23587 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23588 with specified algorithm. */
23590 ix86_expand_set_or_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx val_exp
,
23591 rtx align_exp
, rtx expected_align_exp
,
23592 rtx expected_size_exp
, bool issetmem
)
23598 rtx jump_around_label
= NULL
;
23599 HOST_WIDE_INT align
= 1;
23600 unsigned HOST_WIDE_INT count
= 0;
23601 HOST_WIDE_INT expected_size
= -1;
23602 int size_needed
= 0, epilogue_size_needed
;
23603 int desired_align
= 0, align_bytes
= 0;
23604 enum stringop_alg alg
;
23605 rtx promoted_val
= NULL
;
23606 rtx vec_promoted_val
= NULL
;
23607 bool force_loopy_epilogue
= false;
23609 bool need_zero_guard
= false;
23611 enum machine_mode move_mode
= VOIDmode
;
23612 int unroll_factor
= 1;
23613 /* TODO: Once vlaue ranges are available, fill in proper data. */
23614 unsigned HOST_WIDE_INT min_size
= 0;
23615 unsigned HOST_WIDE_INT max_size
= -1;
23616 bool misaligned_prologue_used
= false;
23618 if (CONST_INT_P (align_exp
))
23619 align
= INTVAL (align_exp
);
23620 /* i386 can do misaligned access on reasonably increased cost. */
23621 if (CONST_INT_P (expected_align_exp
)
23622 && INTVAL (expected_align_exp
) > align
)
23623 align
= INTVAL (expected_align_exp
);
23624 /* ALIGN is the minimum of destination and source alignment, but we care here
23625 just about destination alignment. */
23627 && MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
23628 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
23630 if (CONST_INT_P (count_exp
))
23631 min_size
= max_size
= count
= expected_size
= INTVAL (count_exp
);
23632 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23633 expected_size
= INTVAL (expected_size_exp
);
23635 /* Make sure we don't need to care about overflow later on. */
23636 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23639 /* Step 0: Decide on preferred algorithm, desired alignment and
23640 size of chunks to be copied by main loop. */
23641 alg
= decide_alg (count
, expected_size
, min_size
, max_size
, issetmem
,
23642 issetmem
&& val_exp
== const0_rtx
,
23643 &dynamic_check
, &noalign
);
23644 if (alg
== libcall
)
23646 gcc_assert (alg
!= no_stringop
);
23648 /* For now vector-version of memset is generated only for memory zeroing, as
23649 creating of promoted vector value is very cheap in this case. */
23650 if (issetmem
&& alg
== vector_loop
&& val_exp
!= const0_rtx
)
23651 alg
= unrolled_loop
;
23654 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
23655 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
23657 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
23660 move_mode
= word_mode
;
23666 gcc_unreachable ();
23668 need_zero_guard
= true;
23669 move_mode
= QImode
;
23672 need_zero_guard
= true;
23674 case unrolled_loop
:
23675 need_zero_guard
= true;
23676 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23679 need_zero_guard
= true;
23681 /* Find the widest supported mode. */
23682 move_mode
= word_mode
;
23683 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23684 != CODE_FOR_nothing
)
23685 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23687 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23688 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23689 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23691 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23692 move_mode
= mode_for_vector (word_mode
, nunits
);
23693 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23694 move_mode
= word_mode
;
23696 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23698 case rep_prefix_8_byte
:
23699 move_mode
= DImode
;
23701 case rep_prefix_4_byte
:
23702 move_mode
= SImode
;
23704 case rep_prefix_1_byte
:
23705 move_mode
= QImode
;
23708 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23709 epilogue_size_needed
= size_needed
;
23711 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23712 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23713 align
= desired_align
;
23715 /* Step 1: Prologue guard. */
23717 /* Alignment code needs count to be in register. */
23718 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23720 if (INTVAL (count_exp
) > desired_align
23721 && INTVAL (count_exp
) > size_needed
)
23724 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23725 if (align_bytes
<= 0)
23728 align_bytes
= desired_align
- align_bytes
;
23730 if (align_bytes
== 0)
23731 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23733 gcc_assert (desired_align
>= 1 && align
>= 1);
23735 /* Misaligned move sequences handles both prologues and epilogues at once.
23736 Default code generation results in smaller code for large alignments and
23737 also avoids redundant job when sizes are known precisely. */
23738 misaligned_prologue_used
= (TARGET_MISALIGNED_MOVE_STRING_PROLOGUES
23739 && MAX (desired_align
, epilogue_size_needed
) <= 32
23740 && ((desired_align
> align
&& !align_bytes
)
23741 || (!count
&& epilogue_size_needed
> 1)));
23743 /* Do the cheap promotion to allow better CSE across the
23744 main loop and epilogue (ie one load of the big constant in the
23746 For now the misaligned move sequences do not have fast path
23747 without broadcasting. */
23748 if (issetmem
&& ((CONST_INT_P (val_exp
) || misaligned_prologue_used
)))
23750 if (alg
== vector_loop
)
23752 gcc_assert (val_exp
== const0_rtx
);
23753 vec_promoted_val
= promote_duplicated_reg (move_mode
, val_exp
);
23754 promoted_val
= promote_duplicated_reg_to_size (val_exp
,
23755 GET_MODE_SIZE (word_mode
),
23756 desired_align
, align
);
23760 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23761 desired_align
, align
);
23764 /* Misaligned move sequences handles both prologues and epilogues at once.
23765 Default code generation results in smaller code for large alignments and
23766 also avoids redundant job when sizes are known precisely. */
23767 if (misaligned_prologue_used
)
23769 /* Misaligned move prologue handled small blocks by itself. */
23770 misaligned_prologue_used
= true;
23771 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
23772 (dst
, src
, &destreg
, &srcreg
,
23773 move_mode
, promoted_val
, vec_promoted_val
,
23775 &jump_around_label
,
23776 desired_align
< align
23777 ? MAX (desired_align
, epilogue_size_needed
) : epilogue_size_needed
,
23778 desired_align
, align
, &min_size
, dynamic_check
, issetmem
);
23780 src
= change_address (src
, BLKmode
, srcreg
);
23781 dst
= change_address (dst
, BLKmode
, destreg
);
23782 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23783 epilogue_size_needed
= 0;
23784 if (need_zero_guard
&& !min_size
)
23786 /* It is possible that we copied enough so the main loop will not
23788 gcc_assert (size_needed
> 1);
23789 if (jump_around_label
== NULL_RTX
)
23790 jump_around_label
= gen_label_rtx ();
23791 emit_cmp_and_jump_insns (count_exp
,
23792 GEN_INT (size_needed
),
23793 LTU
, 0, counter_mode (count_exp
), 1, jump_around_label
);
23794 if (expected_size
== -1
23795 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23796 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23798 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23801 /* Ensure that alignment prologue won't copy past end of block. */
23802 else if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23804 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23805 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23806 Make sure it is power of 2. */
23807 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23809 /* To improve performance of small blocks, we jump around the VAL
23810 promoting mode. This mean that if the promoted VAL is not constant,
23811 we might not use it in the epilogue and have to use byte
23813 if (issetmem
&& epilogue_size_needed
> 2 && !promoted_val
)
23814 force_loopy_epilogue
= true;
23817 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23819 /* If main algorithm works on QImode, no epilogue is needed.
23820 For small sizes just don't align anything. */
23821 if (size_needed
== 1)
23822 desired_align
= align
;
23827 else if (min_size
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23829 gcc_assert (max_size
>= (unsigned HOST_WIDE_INT
)epilogue_size_needed
);
23830 label
= gen_label_rtx ();
23831 emit_cmp_and_jump_insns (count_exp
,
23832 GEN_INT (epilogue_size_needed
),
23833 LTU
, 0, counter_mode (count_exp
), 1, label
);
23834 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23835 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23837 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23841 /* Emit code to decide on runtime whether library call or inline should be
23843 if (dynamic_check
!= -1)
23845 if (!issetmem
&& CONST_INT_P (count_exp
))
23847 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23849 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23850 count_exp
= const0_rtx
;
23856 rtx hot_label
= gen_label_rtx ();
23857 jump_around_label
= gen_label_rtx ();
23858 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23859 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
23860 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23862 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23864 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23865 emit_jump (jump_around_label
);
23866 emit_label (hot_label
);
23870 /* Step 2: Alignment prologue. */
23871 /* Do the expensive promotion once we branched off the small blocks. */
23872 if (issetmem
&& !promoted_val
)
23873 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23874 desired_align
, align
);
23876 if (desired_align
> align
&& !misaligned_prologue_used
)
23878 if (align_bytes
== 0)
23880 /* Except for the first move in prologue, we no longer know
23881 constant offset in aliasing info. It don't seems to worth
23882 the pain to maintain it for the first move, so throw away
23884 dst
= change_address (dst
, BLKmode
, destreg
);
23886 src
= change_address (src
, BLKmode
, srcreg
);
23887 dst
= expand_set_or_movmem_prologue (dst
, src
, destreg
, srcreg
,
23888 promoted_val
, vec_promoted_val
,
23889 count_exp
, align
, desired_align
,
23891 /* At most desired_align - align bytes are copied. */
23892 if (min_size
< (unsigned)(desired_align
- align
))
23895 min_size
-= desired_align
- align
;
23899 /* If we know how many bytes need to be stored before dst is
23900 sufficiently aligned, maintain aliasing info accurately. */
23901 dst
= expand_set_or_movmem_constant_prologue (dst
, &src
, destreg
,
23909 count_exp
= plus_constant (counter_mode (count_exp
),
23910 count_exp
, -align_bytes
);
23911 count
-= align_bytes
;
23912 min_size
-= align_bytes
;
23913 max_size
-= align_bytes
;
23915 if (need_zero_guard
23917 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23918 || (align_bytes
== 0
23919 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23920 + desired_align
- align
))))
23922 /* It is possible that we copied enough so the main loop will not
23924 gcc_assert (size_needed
> 1);
23925 if (label
== NULL_RTX
)
23926 label
= gen_label_rtx ();
23927 emit_cmp_and_jump_insns (count_exp
,
23928 GEN_INT (size_needed
),
23929 LTU
, 0, counter_mode (count_exp
), 1, label
);
23930 if (expected_size
== -1
23931 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23932 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23934 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23937 if (label
&& size_needed
== 1)
23939 emit_label (label
);
23940 LABEL_NUSES (label
) = 1;
23942 epilogue_size_needed
= 1;
23944 promoted_val
= val_exp
;
23946 else if (label
== NULL_RTX
&& !misaligned_prologue_used
)
23947 epilogue_size_needed
= size_needed
;
23949 /* Step 3: Main loop. */
23956 gcc_unreachable ();
23959 case unrolled_loop
:
23960 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, promoted_val
,
23961 count_exp
, move_mode
, unroll_factor
,
23962 expected_size
, issetmem
);
23965 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
,
23966 vec_promoted_val
, count_exp
, move_mode
,
23967 unroll_factor
, expected_size
, issetmem
);
23969 case rep_prefix_8_byte
:
23970 case rep_prefix_4_byte
:
23971 case rep_prefix_1_byte
:
23972 expand_set_or_movmem_via_rep (dst
, src
, destreg
, srcreg
, promoted_val
,
23973 val_exp
, count_exp
, move_mode
, issetmem
);
23976 /* Adjust properly the offset of src and dest memory for aliasing. */
23977 if (CONST_INT_P (count_exp
))
23980 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
23981 (count
/ size_needed
) * size_needed
);
23982 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23983 (count
/ size_needed
) * size_needed
);
23988 src
= change_address (src
, BLKmode
, srcreg
);
23989 dst
= change_address (dst
, BLKmode
, destreg
);
23992 /* Step 4: Epilogue to copy the remaining bytes. */
23996 /* When the main loop is done, COUNT_EXP might hold original count,
23997 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23998 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23999 bytes. Compensate if needed. */
24001 if (size_needed
< epilogue_size_needed
)
24004 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
24005 GEN_INT (size_needed
- 1), count_exp
, 1,
24007 if (tmp
!= count_exp
)
24008 emit_move_insn (count_exp
, tmp
);
24010 emit_label (label
);
24011 LABEL_NUSES (label
) = 1;
24014 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
24016 if (force_loopy_epilogue
)
24017 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
24018 epilogue_size_needed
);
24022 expand_setmem_epilogue (dst
, destreg
, promoted_val
,
24023 vec_promoted_val
, count_exp
,
24024 epilogue_size_needed
);
24026 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
24027 epilogue_size_needed
);
24030 if (jump_around_label
)
24031 emit_label (jump_around_label
);
24035 /* Wrapper for ix86_expand_set_or_movmem for memcpy case. */
24037 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
24038 rtx expected_align_exp
, rtx expected_size_exp
)
24040 return ix86_expand_set_or_movmem (dst
, src
, count_exp
, NULL
, align_exp
,
24041 expected_align_exp
, expected_size_exp
, false);
24044 /* Wrapper for ix86_expand_set_or_movmem for memset case. */
24046 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
24047 rtx expected_align_exp
, rtx expected_size_exp
)
24049 return ix86_expand_set_or_movmem (dst
, NULL
, count_exp
, val_exp
, align_exp
,
24050 expected_align_exp
, expected_size_exp
, true);
24054 /* Expand the appropriate insns for doing strlen if not just doing
24057 out = result, initialized with the start address
24058 align_rtx = alignment of the address.
24059 scratch = scratch register, initialized with the startaddress when
24060 not aligned, otherwise undefined
24062 This is just the body. It needs the initializations mentioned above and
24063 some address computing at the end. These things are done in i386.md. */
24066 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
24070 rtx align_2_label
= NULL_RTX
;
24071 rtx align_3_label
= NULL_RTX
;
24072 rtx align_4_label
= gen_label_rtx ();
24073 rtx end_0_label
= gen_label_rtx ();
24075 rtx tmpreg
= gen_reg_rtx (SImode
);
24076 rtx scratch
= gen_reg_rtx (SImode
);
24080 if (CONST_INT_P (align_rtx
))
24081 align
= INTVAL (align_rtx
);
24083 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24085 /* Is there a known alignment and is it less than 4? */
24088 rtx scratch1
= gen_reg_rtx (Pmode
);
24089 emit_move_insn (scratch1
, out
);
24090 /* Is there a known alignment and is it not 2? */
24093 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
24094 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
24096 /* Leave just the 3 lower bits. */
24097 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
24098 NULL_RTX
, 0, OPTAB_WIDEN
);
24100 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24101 Pmode
, 1, align_4_label
);
24102 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
24103 Pmode
, 1, align_2_label
);
24104 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
24105 Pmode
, 1, align_3_label
);
24109 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24110 check if is aligned to 4 - byte. */
24112 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
24113 NULL_RTX
, 0, OPTAB_WIDEN
);
24115 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24116 Pmode
, 1, align_4_label
);
24119 mem
= change_address (src
, QImode
, out
);
24121 /* Now compare the bytes. */
24123 /* Compare the first n unaligned byte on a byte per byte basis. */
24124 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
24125 QImode
, 1, end_0_label
);
24127 /* Increment the address. */
24128 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24130 /* Not needed with an alignment of 2 */
24133 emit_label (align_2_label
);
24135 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24138 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24140 emit_label (align_3_label
);
24143 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24146 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24149 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24150 align this loop. It gives only huge programs, but does not help to
24152 emit_label (align_4_label
);
24154 mem
= change_address (src
, SImode
, out
);
24155 emit_move_insn (scratch
, mem
);
24156 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
24158 /* This formula yields a nonzero result iff one of the bytes is zero.
24159 This saves three branches inside loop and many cycles. */
24161 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
24162 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
24163 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
24164 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
24165 gen_int_mode (0x80808080, SImode
)));
24166 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
24171 rtx reg
= gen_reg_rtx (SImode
);
24172 rtx reg2
= gen_reg_rtx (Pmode
);
24173 emit_move_insn (reg
, tmpreg
);
24174 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
24176 /* If zero is not in the first two bytes, move two bytes forward. */
24177 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24178 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24179 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24180 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
24181 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
24184 /* Emit lea manually to avoid clobbering of flags. */
24185 emit_insn (gen_rtx_SET (SImode
, reg2
,
24186 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
24188 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24189 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24190 emit_insn (gen_rtx_SET (VOIDmode
, out
,
24191 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
24197 rtx end_2_label
= gen_label_rtx ();
24198 /* Is zero in the first two bytes? */
24200 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24201 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24202 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
24203 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24204 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
24206 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24207 JUMP_LABEL (tmp
) = end_2_label
;
24209 /* Not in the first two. Move two bytes forward. */
24210 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
24211 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
24213 emit_label (end_2_label
);
24217 /* Avoid branch in fixing the byte. */
24218 tmpreg
= gen_lowpart (QImode
, tmpreg
);
24219 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
24220 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24221 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
24222 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
24224 emit_label (end_0_label
);
24227 /* Expand strlen. */
24230 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
24232 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
24234 /* The generic case of strlen expander is long. Avoid it's
24235 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24237 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24238 && !TARGET_INLINE_ALL_STRINGOPS
24239 && !optimize_insn_for_size_p ()
24240 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
24243 addr
= force_reg (Pmode
, XEXP (src
, 0));
24244 scratch1
= gen_reg_rtx (Pmode
);
24246 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24247 && !optimize_insn_for_size_p ())
24249 /* Well it seems that some optimizer does not combine a call like
24250 foo(strlen(bar), strlen(bar));
24251 when the move and the subtraction is done here. It does calculate
24252 the length just once when these instructions are done inside of
24253 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24254 often used and I use one fewer register for the lifetime of
24255 output_strlen_unroll() this is better. */
24257 emit_move_insn (out
, addr
);
24259 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
24261 /* strlensi_unroll_1 returns the address of the zero at the end of
24262 the string, like memchr(), so compute the length by subtracting
24263 the start address. */
24264 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
24270 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24271 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
24274 scratch2
= gen_reg_rtx (Pmode
);
24275 scratch3
= gen_reg_rtx (Pmode
);
24276 scratch4
= force_reg (Pmode
, constm1_rtx
);
24278 emit_move_insn (scratch3
, addr
);
24279 eoschar
= force_reg (QImode
, eoschar
);
24281 src
= replace_equiv_address_nv (src
, scratch3
);
24283 /* If .md starts supporting :P, this can be done in .md. */
24284 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
24285 scratch4
), UNSPEC_SCAS
);
24286 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
24287 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
24288 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
24293 /* For given symbol (function) construct code to compute address of it's PLT
24294 entry in large x86-64 PIC model. */
24296 construct_plt_address (rtx symbol
)
24300 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
24301 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
24302 gcc_assert (Pmode
== DImode
);
24304 tmp
= gen_reg_rtx (Pmode
);
24305 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
24307 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
24308 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
24313 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
24315 rtx pop
, bool sibcall
)
24317 unsigned int const cregs_size
24318 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
24319 rtx vec
[3 + cregs_size
];
24320 rtx use
= NULL
, call
;
24321 unsigned int vec_len
= 0;
24323 if (pop
== const0_rtx
)
24325 gcc_assert (!TARGET_64BIT
|| !pop
);
24327 if (TARGET_MACHO
&& !TARGET_64BIT
)
24330 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
24331 fnaddr
= machopic_indirect_call_target (fnaddr
);
24336 /* Static functions and indirect calls don't need the pic register. */
24339 || (ix86_cmodel
== CM_LARGE_PIC
24340 && DEFAULT_ABI
!= MS_ABI
))
24341 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24342 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
24343 use_reg (&use
, pic_offset_table_rtx
);
24346 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
24348 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
24349 emit_move_insn (al
, callarg2
);
24350 use_reg (&use
, al
);
24353 if (ix86_cmodel
== CM_LARGE_PIC
24356 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24357 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
24358 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
24360 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
24361 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
24363 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
24364 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
24367 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
24369 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
24370 vec
[vec_len
++] = call
;
24374 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
24375 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
24376 vec
[vec_len
++] = pop
;
24379 if (TARGET_64BIT_MS_ABI
24380 && (!callarg2
|| INTVAL (callarg2
) != -2))
24384 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
24385 UNSPEC_MS_TO_SYSV_CALL
);
24387 for (i
= 0; i
< cregs_size
; i
++)
24389 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
24390 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
24393 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
24398 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
24399 call
= emit_call_insn (call
);
24401 CALL_INSN_FUNCTION_USAGE (call
) = use
;
24406 /* Output the assembly for a call instruction. */
24409 ix86_output_call_insn (rtx insn
, rtx call_op
)
24411 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24412 bool seh_nop_p
= false;
24415 if (SIBLING_CALL_P (insn
))
24418 xasm
= "%!jmp\t%P0";
24419 /* SEH epilogue detection requires the indirect branch case
24420 to include REX.W. */
24421 else if (TARGET_SEH
)
24422 xasm
= "%!rex.W jmp %A0";
24424 xasm
= "%!jmp\t%A0";
24426 output_asm_insn (xasm
, &call_op
);
24430 /* SEH unwinding can require an extra nop to be emitted in several
24431 circumstances. Determine if we have one of those. */
24436 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24438 /* If we get to another real insn, we don't need the nop. */
24442 /* If we get to the epilogue note, prevent a catch region from
24443 being adjacent to the standard epilogue sequence. If non-
24444 call-exceptions, we'll have done this during epilogue emission. */
24445 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24446 && !flag_non_call_exceptions
24447 && !can_throw_internal (insn
))
24454 /* If we didn't find a real insn following the call, prevent the
24455 unwinder from looking into the next function. */
24461 xasm
= "%!call\t%P0";
24463 xasm
= "%!call\t%A0";
24465 output_asm_insn (xasm
, &call_op
);
24473 /* Clear stack slot assignments remembered from previous functions.
24474 This is called from INIT_EXPANDERS once before RTL is emitted for each
24477 static struct machine_function
*
24478 ix86_init_machine_status (void)
24480 struct machine_function
*f
;
24482 f
= ggc_alloc_cleared_machine_function ();
24483 f
->use_fast_prologue_epilogue_nregs
= -1;
24484 f
->call_abi
= ix86_abi
;
24489 /* Return a MEM corresponding to a stack slot with mode MODE.
24490 Allocate a new slot if necessary.
24492 The RTL for a function can have several slots available: N is
24493 which slot to use. */
24496 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24498 struct stack_local_entry
*s
;
24500 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24502 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24503 if (s
->mode
== mode
&& s
->n
== n
)
24504 return validize_mem (copy_rtx (s
->rtl
));
24506 s
= ggc_alloc_stack_local_entry ();
24509 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24511 s
->next
= ix86_stack_locals
;
24512 ix86_stack_locals
= s
;
24513 return validize_mem (s
->rtl
);
24517 ix86_instantiate_decls (void)
24519 struct stack_local_entry
*s
;
24521 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24522 if (s
->rtl
!= NULL_RTX
)
24523 instantiate_decl_rtl (s
->rtl
);
24526 /* Check whether x86 address PARTS is a pc-relative address. */
24529 rip_relative_addr_p (struct ix86_address
*parts
)
24531 rtx base
, index
, disp
;
24533 base
= parts
->base
;
24534 index
= parts
->index
;
24535 disp
= parts
->disp
;
24537 if (disp
&& !base
&& !index
)
24543 if (GET_CODE (disp
) == CONST
)
24544 symbol
= XEXP (disp
, 0);
24545 if (GET_CODE (symbol
) == PLUS
24546 && CONST_INT_P (XEXP (symbol
, 1)))
24547 symbol
= XEXP (symbol
, 0);
24549 if (GET_CODE (symbol
) == LABEL_REF
24550 || (GET_CODE (symbol
) == SYMBOL_REF
24551 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
24552 || (GET_CODE (symbol
) == UNSPEC
24553 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
24554 || XINT (symbol
, 1) == UNSPEC_PCREL
24555 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
24562 /* Calculate the length of the memory address in the instruction encoding.
24563 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24564 or other prefixes. We never generate addr32 prefix for LEA insn. */
24567 memory_address_length (rtx addr
, bool lea
)
24569 struct ix86_address parts
;
24570 rtx base
, index
, disp
;
24574 if (GET_CODE (addr
) == PRE_DEC
24575 || GET_CODE (addr
) == POST_INC
24576 || GET_CODE (addr
) == PRE_MODIFY
24577 || GET_CODE (addr
) == POST_MODIFY
)
24580 ok
= ix86_decompose_address (addr
, &parts
);
24583 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24585 /* If this is not LEA instruction, add the length of addr32 prefix. */
24586 if (TARGET_64BIT
&& !lea
24587 && (SImode_address_operand (addr
, VOIDmode
)
24588 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24589 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24593 index
= parts
.index
;
24596 if (base
&& GET_CODE (base
) == SUBREG
)
24597 base
= SUBREG_REG (base
);
24598 if (index
&& GET_CODE (index
) == SUBREG
)
24599 index
= SUBREG_REG (index
);
24601 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24602 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24605 - esp as the base always wants an index,
24606 - ebp as the base always wants a displacement,
24607 - r12 as the base always wants an index,
24608 - r13 as the base always wants a displacement. */
24610 /* Register Indirect. */
24611 if (base
&& !index
&& !disp
)
24613 /* esp (for its index) and ebp (for its displacement) need
24614 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24616 if (base
== arg_pointer_rtx
24617 || base
== frame_pointer_rtx
24618 || REGNO (base
) == SP_REG
24619 || REGNO (base
) == BP_REG
24620 || REGNO (base
) == R12_REG
24621 || REGNO (base
) == R13_REG
)
24625 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24626 is not disp32, but disp32(%rip), so for disp32
24627 SIB byte is needed, unless print_operand_address
24628 optimizes it into disp32(%rip) or (%rip) is implied
24630 else if (disp
&& !base
&& !index
)
24633 if (rip_relative_addr_p (&parts
))
24638 /* Find the length of the displacement constant. */
24641 if (base
&& satisfies_constraint_K (disp
))
24646 /* ebp always wants a displacement. Similarly r13. */
24647 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24650 /* An index requires the two-byte modrm form.... */
24652 /* ...like esp (or r12), which always wants an index. */
24653 || base
== arg_pointer_rtx
24654 || base
== frame_pointer_rtx
24655 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24662 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24663 is set, expect that insn have 8bit immediate alternative. */
24665 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24669 extract_insn_cached (insn
);
24670 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24671 if (CONSTANT_P (recog_data
.operand
[i
]))
24673 enum attr_mode mode
= get_attr_mode (insn
);
24676 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24678 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24685 ival
= trunc_int_for_mode (ival
, HImode
);
24688 ival
= trunc_int_for_mode (ival
, SImode
);
24693 if (IN_RANGE (ival
, -128, 127))
24710 /* Immediates for DImode instructions are encoded
24711 as 32bit sign extended values. */
24716 fatal_insn ("unknown insn mode", insn
);
24722 /* Compute default value for "length_address" attribute. */
24724 ix86_attr_length_address_default (rtx insn
)
24728 if (get_attr_type (insn
) == TYPE_LEA
)
24730 rtx set
= PATTERN (insn
), addr
;
24732 if (GET_CODE (set
) == PARALLEL
)
24733 set
= XVECEXP (set
, 0, 0);
24735 gcc_assert (GET_CODE (set
) == SET
);
24737 addr
= SET_SRC (set
);
24739 return memory_address_length (addr
, true);
24742 extract_insn_cached (insn
);
24743 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24744 if (MEM_P (recog_data
.operand
[i
]))
24746 constrain_operands_cached (reload_completed
);
24747 if (which_alternative
!= -1)
24749 const char *constraints
= recog_data
.constraints
[i
];
24750 int alt
= which_alternative
;
24752 while (*constraints
== '=' || *constraints
== '+')
24755 while (*constraints
++ != ',')
24757 /* Skip ignored operands. */
24758 if (*constraints
== 'X')
24761 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24766 /* Compute default value for "length_vex" attribute. It includes
24767 2 or 3 byte VEX prefix and 1 opcode byte. */
24770 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24774 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24775 byte VEX prefix. */
24776 if (!has_0f_opcode
|| has_vex_w
)
24779 /* We can always use 2 byte VEX prefix in 32bit. */
24783 extract_insn_cached (insn
);
24785 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24786 if (REG_P (recog_data
.operand
[i
]))
24788 /* REX.W bit uses 3 byte VEX prefix. */
24789 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24790 && GENERAL_REG_P (recog_data
.operand
[i
]))
24795 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24796 if (MEM_P (recog_data
.operand
[i
])
24797 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24804 /* Return the maximum number of instructions a cpu can issue. */
24807 ix86_issue_rate (void)
24811 case PROCESSOR_PENTIUM
:
24812 case PROCESSOR_ATOM
:
24813 case PROCESSOR_SLM
:
24815 case PROCESSOR_BTVER2
:
24816 case PROCESSOR_PENTIUM4
:
24817 case PROCESSOR_NOCONA
:
24820 case PROCESSOR_PENTIUMPRO
:
24821 case PROCESSOR_ATHLON
:
24823 case PROCESSOR_AMDFAM10
:
24824 case PROCESSOR_GENERIC
:
24825 case PROCESSOR_BDVER1
:
24826 case PROCESSOR_BDVER2
:
24827 case PROCESSOR_BDVER3
:
24828 case PROCESSOR_BTVER1
:
24831 case PROCESSOR_CORE2
:
24832 case PROCESSOR_COREI7
:
24833 case PROCESSOR_COREI7_AVX
:
24834 case PROCESSOR_HASWELL
:
24842 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24843 by DEP_INSN and nothing set by DEP_INSN. */
24846 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24850 /* Simplify the test for uninteresting insns. */
24851 if (insn_type
!= TYPE_SETCC
24852 && insn_type
!= TYPE_ICMOV
24853 && insn_type
!= TYPE_FCMOV
24854 && insn_type
!= TYPE_IBR
)
24857 if ((set
= single_set (dep_insn
)) != 0)
24859 set
= SET_DEST (set
);
24862 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24863 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24864 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24865 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24867 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24868 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24873 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24876 /* This test is true if the dependent insn reads the flags but
24877 not any other potentially set register. */
24878 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24881 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24887 /* Return true iff USE_INSN has a memory address with operands set by
24891 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24894 extract_insn_cached (use_insn
);
24895 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24896 if (MEM_P (recog_data
.operand
[i
]))
24898 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24899 return modified_in_p (addr
, set_insn
) != 0;
24904 /* Helper function for exact_store_load_dependency.
24905 Return true if addr is found in insn. */
24907 exact_dependency_1 (rtx addr
, rtx insn
)
24909 enum rtx_code code
;
24910 const char *format_ptr
;
24913 code
= GET_CODE (insn
);
24917 if (rtx_equal_p (addr
, insn
))
24932 format_ptr
= GET_RTX_FORMAT (code
);
24933 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
24935 switch (*format_ptr
++)
24938 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
24942 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
24943 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
24951 /* Return true if there exists exact dependency for store & load, i.e.
24952 the same memory address is used in them. */
24954 exact_store_load_dependency (rtx store
, rtx load
)
24958 set1
= single_set (store
);
24961 if (!MEM_P (SET_DEST (set1
)))
24963 set2
= single_set (load
);
24966 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
24972 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24974 enum attr_type insn_type
, dep_insn_type
;
24975 enum attr_memory memory
;
24977 int dep_insn_code_number
;
24979 /* Anti and output dependencies have zero cost on all CPUs. */
24980 if (REG_NOTE_KIND (link
) != 0)
24983 dep_insn_code_number
= recog_memoized (dep_insn
);
24985 /* If we can't recognize the insns, we can't really do anything. */
24986 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24989 insn_type
= get_attr_type (insn
);
24990 dep_insn_type
= get_attr_type (dep_insn
);
24994 case PROCESSOR_PENTIUM
:
24995 /* Address Generation Interlock adds a cycle of latency. */
24996 if (insn_type
== TYPE_LEA
)
24998 rtx addr
= PATTERN (insn
);
25000 if (GET_CODE (addr
) == PARALLEL
)
25001 addr
= XVECEXP (addr
, 0, 0);
25003 gcc_assert (GET_CODE (addr
) == SET
);
25005 addr
= SET_SRC (addr
);
25006 if (modified_in_p (addr
, dep_insn
))
25009 else if (ix86_agi_dependent (dep_insn
, insn
))
25012 /* ??? Compares pair with jump/setcc. */
25013 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
25016 /* Floating point stores require value to be ready one cycle earlier. */
25017 if (insn_type
== TYPE_FMOV
25018 && get_attr_memory (insn
) == MEMORY_STORE
25019 && !ix86_agi_dependent (dep_insn
, insn
))
25023 case PROCESSOR_PENTIUMPRO
:
25024 memory
= get_attr_memory (insn
);
25026 /* INT->FP conversion is expensive. */
25027 if (get_attr_fp_int_src (dep_insn
))
25030 /* There is one cycle extra latency between an FP op and a store. */
25031 if (insn_type
== TYPE_FMOV
25032 && (set
= single_set (dep_insn
)) != NULL_RTX
25033 && (set2
= single_set (insn
)) != NULL_RTX
25034 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
25035 && MEM_P (SET_DEST (set2
)))
25038 /* Show ability of reorder buffer to hide latency of load by executing
25039 in parallel with previous instruction in case
25040 previous instruction is not needed to compute the address. */
25041 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25042 && !ix86_agi_dependent (dep_insn
, insn
))
25044 /* Claim moves to take one cycle, as core can issue one load
25045 at time and the next load can start cycle later. */
25046 if (dep_insn_type
== TYPE_IMOV
25047 || dep_insn_type
== TYPE_FMOV
)
25055 memory
= get_attr_memory (insn
);
25057 /* The esp dependency is resolved before the instruction is really
25059 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25060 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25063 /* INT->FP conversion is expensive. */
25064 if (get_attr_fp_int_src (dep_insn
))
25067 /* Show ability of reorder buffer to hide latency of load by executing
25068 in parallel with previous instruction in case
25069 previous instruction is not needed to compute the address. */
25070 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25071 && !ix86_agi_dependent (dep_insn
, insn
))
25073 /* Claim moves to take one cycle, as core can issue one load
25074 at time and the next load can start cycle later. */
25075 if (dep_insn_type
== TYPE_IMOV
25076 || dep_insn_type
== TYPE_FMOV
)
25085 case PROCESSOR_ATHLON
:
25087 case PROCESSOR_AMDFAM10
:
25088 case PROCESSOR_BDVER1
:
25089 case PROCESSOR_BDVER2
:
25090 case PROCESSOR_BDVER3
:
25091 case PROCESSOR_BTVER1
:
25092 case PROCESSOR_BTVER2
:
25093 case PROCESSOR_GENERIC
:
25094 memory
= get_attr_memory (insn
);
25096 /* Stack engine allows to execute push&pop instructions in parall. */
25097 if (((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25098 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25099 && (ix86_tune
!= PROCESSOR_ATHLON
&& ix86_tune
!= PROCESSOR_K8
))
25102 /* Show ability of reorder buffer to hide latency of load by executing
25103 in parallel with previous instruction in case
25104 previous instruction is not needed to compute the address. */
25105 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25106 && !ix86_agi_dependent (dep_insn
, insn
))
25108 enum attr_unit unit
= get_attr_unit (insn
);
25111 /* Because of the difference between the length of integer and
25112 floating unit pipeline preparation stages, the memory operands
25113 for floating point are cheaper.
25115 ??? For Athlon it the difference is most probably 2. */
25116 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
25119 loadcost
= TARGET_ATHLON
? 2 : 0;
25121 if (cost
>= loadcost
)
25128 case PROCESSOR_CORE2
:
25129 case PROCESSOR_COREI7
:
25130 case PROCESSOR_COREI7_AVX
:
25131 case PROCESSOR_HASWELL
:
25132 memory
= get_attr_memory (insn
);
25134 /* Stack engine allows to execute push&pop instructions in parall. */
25135 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25136 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25139 /* Show ability of reorder buffer to hide latency of load by executing
25140 in parallel with previous instruction in case
25141 previous instruction is not needed to compute the address. */
25142 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25143 && !ix86_agi_dependent (dep_insn
, insn
))
25152 case PROCESSOR_SLM
:
25153 if (!reload_completed
)
25156 /* Increase cost of integer loads. */
25157 memory
= get_attr_memory (dep_insn
);
25158 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25160 enum attr_unit unit
= get_attr_unit (dep_insn
);
25161 if (unit
== UNIT_INTEGER
&& cost
== 1)
25163 if (memory
== MEMORY_LOAD
)
25167 /* Increase cost of ld/st for short int types only
25168 because of store forwarding issue. */
25169 rtx set
= single_set (dep_insn
);
25170 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
25171 || GET_MODE (SET_DEST (set
)) == HImode
))
25173 /* Increase cost of store/load insn if exact
25174 dependence exists and it is load insn. */
25175 enum attr_memory insn_memory
= get_attr_memory (insn
);
25176 if (insn_memory
== MEMORY_LOAD
25177 && exact_store_load_dependency (dep_insn
, insn
))
25191 /* How many alternative schedules to try. This should be as wide as the
25192 scheduling freedom in the DFA, but no wider. Making this value too
25193 large results extra work for the scheduler. */
25196 ia32_multipass_dfa_lookahead (void)
25200 case PROCESSOR_PENTIUM
:
25203 case PROCESSOR_PENTIUMPRO
:
25207 case PROCESSOR_CORE2
:
25208 case PROCESSOR_COREI7
:
25209 case PROCESSOR_COREI7_AVX
:
25210 case PROCESSOR_HASWELL
:
25211 case PROCESSOR_ATOM
:
25212 case PROCESSOR_SLM
:
25213 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25214 as many instructions can be executed on a cycle, i.e.,
25215 issue_rate. I wonder why tuning for many CPUs does not do this. */
25216 if (reload_completed
)
25217 return ix86_issue_rate ();
25218 /* Don't use lookahead for pre-reload schedule to save compile time. */
25226 /* Return true if target platform supports macro-fusion. */
25229 ix86_macro_fusion_p ()
25231 return TARGET_FUSE_CMP_AND_BRANCH
;
25234 /* Check whether current microarchitecture support macro fusion
25235 for insn pair "CONDGEN + CONDJMP". Refer to
25236 "Intel Architectures Optimization Reference Manual". */
25239 ix86_macro_fusion_pair_p (rtx condgen
, rtx condjmp
)
25242 rtx single_set
= single_set (condgen
);
25243 enum rtx_code ccode
;
25244 rtx compare_set
= NULL_RTX
, test_if
, cond
;
25245 rtx alu_set
= NULL_RTX
, addr
= NULL_RTX
;
25247 if (get_attr_type (condgen
) != TYPE_TEST
25248 && get_attr_type (condgen
) != TYPE_ICMP
25249 && get_attr_type (condgen
) != TYPE_INCDEC
25250 && get_attr_type (condgen
) != TYPE_ALU
)
25253 if (single_set
== NULL_RTX
25254 && !TARGET_FUSE_ALU_AND_BRANCH
)
25257 if (single_set
!= NULL_RTX
)
25258 compare_set
= single_set
;
25262 rtx pat
= PATTERN (condgen
);
25263 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
25264 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
25266 rtx set_src
= SET_SRC (XVECEXP (pat
, 0, i
));
25267 if (GET_CODE (set_src
) == COMPARE
)
25268 compare_set
= XVECEXP (pat
, 0, i
);
25270 alu_set
= XVECEXP (pat
, 0, i
);
25273 if (compare_set
== NULL_RTX
)
25275 src
= SET_SRC (compare_set
);
25276 if (GET_CODE (src
) != COMPARE
)
25279 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25281 if ((MEM_P (XEXP (src
, 0))
25282 && CONST_INT_P (XEXP (src
, 1)))
25283 || (MEM_P (XEXP (src
, 1))
25284 && CONST_INT_P (XEXP (src
, 0))))
25287 /* No fusion for RIP-relative address. */
25288 if (MEM_P (XEXP (src
, 0)))
25289 addr
= XEXP (XEXP (src
, 0), 0);
25290 else if (MEM_P (XEXP (src
, 1)))
25291 addr
= XEXP (XEXP (src
, 1), 0);
25294 ix86_address parts
;
25295 int ok
= ix86_decompose_address (addr
, &parts
);
25298 if (rip_relative_addr_p (&parts
))
25302 test_if
= SET_SRC (pc_set (condjmp
));
25303 cond
= XEXP (test_if
, 0);
25304 ccode
= GET_CODE (cond
);
25305 /* Check whether conditional jump use Sign or Overflow Flags. */
25306 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25313 /* Return true for TYPE_TEST and TYPE_ICMP. */
25314 if (get_attr_type (condgen
) == TYPE_TEST
25315 || get_attr_type (condgen
) == TYPE_ICMP
)
25318 /* The following is the case that macro-fusion for alu + jmp. */
25319 if (!TARGET_FUSE_ALU_AND_BRANCH
|| !alu_set
)
25322 /* No fusion for alu op with memory destination operand. */
25323 dest
= SET_DEST (alu_set
);
25327 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25329 if (get_attr_type (condgen
) == TYPE_INCDEC
25339 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25340 execution. It is applied if
25341 (1) IMUL instruction is on the top of list;
25342 (2) There exists the only producer of independent IMUL instruction in
25344 Return index of IMUL producer if it was found and -1 otherwise. */
25346 do_reorder_for_imul (rtx
*ready
, int n_ready
)
25348 rtx insn
, set
, insn1
, insn2
;
25349 sd_iterator_def sd_it
;
25354 if (ix86_tune
!= PROCESSOR_ATOM
)
25357 /* Check that IMUL instruction is on the top of ready list. */
25358 insn
= ready
[n_ready
- 1];
25359 set
= single_set (insn
);
25362 if (!(GET_CODE (SET_SRC (set
)) == MULT
25363 && GET_MODE (SET_SRC (set
)) == SImode
))
25366 /* Search for producer of independent IMUL instruction. */
25367 for (i
= n_ready
- 2; i
>= 0; i
--)
25370 if (!NONDEBUG_INSN_P (insn
))
25372 /* Skip IMUL instruction. */
25373 insn2
= PATTERN (insn
);
25374 if (GET_CODE (insn2
) == PARALLEL
)
25375 insn2
= XVECEXP (insn2
, 0, 0);
25376 if (GET_CODE (insn2
) == SET
25377 && GET_CODE (SET_SRC (insn2
)) == MULT
25378 && GET_MODE (SET_SRC (insn2
)) == SImode
)
25381 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
25384 con
= DEP_CON (dep
);
25385 if (!NONDEBUG_INSN_P (con
))
25387 insn1
= PATTERN (con
);
25388 if (GET_CODE (insn1
) == PARALLEL
)
25389 insn1
= XVECEXP (insn1
, 0, 0);
25391 if (GET_CODE (insn1
) == SET
25392 && GET_CODE (SET_SRC (insn1
)) == MULT
25393 && GET_MODE (SET_SRC (insn1
)) == SImode
)
25395 sd_iterator_def sd_it1
;
25397 /* Check if there is no other dependee for IMUL. */
25399 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
25402 pro
= DEP_PRO (dep1
);
25403 if (!NONDEBUG_INSN_P (pro
))
25418 /* Try to find the best candidate on the top of ready list if two insns
25419 have the same priority - candidate is best if its dependees were
25420 scheduled earlier. Applied for Silvermont only.
25421 Return true if top 2 insns must be interchanged. */
25423 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
25425 rtx top
= ready
[n_ready
- 1];
25426 rtx next
= ready
[n_ready
- 2];
25428 sd_iterator_def sd_it
;
25432 #define INSN_TICK(INSN) (HID (INSN)->tick)
25434 if (ix86_tune
!= PROCESSOR_SLM
)
25437 if (!NONDEBUG_INSN_P (top
))
25439 if (!NONJUMP_INSN_P (top
))
25441 if (!NONDEBUG_INSN_P (next
))
25443 if (!NONJUMP_INSN_P (next
))
25445 set
= single_set (top
);
25448 set
= single_set (next
);
25452 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
25454 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
25456 /* Determine winner more precise. */
25457 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
25460 pro
= DEP_PRO (dep
);
25461 if (!NONDEBUG_INSN_P (pro
))
25463 if (INSN_TICK (pro
) > clock1
)
25464 clock1
= INSN_TICK (pro
);
25466 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
25469 pro
= DEP_PRO (dep
);
25470 if (!NONDEBUG_INSN_P (pro
))
25472 if (INSN_TICK (pro
) > clock2
)
25473 clock2
= INSN_TICK (pro
);
25476 if (clock1
== clock2
)
25478 /* Determine winner - load must win. */
25479 enum attr_memory memory1
, memory2
;
25480 memory1
= get_attr_memory (top
);
25481 memory2
= get_attr_memory (next
);
25482 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
25485 return (bool) (clock2
< clock1
);
25491 /* Perform possible reodering of ready list for Atom/Silvermont only.
25492 Return issue rate. */
25494 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
25497 int issue_rate
= -1;
25498 int n_ready
= *pn_ready
;
25503 /* Set up issue rate. */
25504 issue_rate
= ix86_issue_rate ();
25506 /* Do reodering for Atom/SLM only. */
25507 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
25510 /* Nothing to do if ready list contains only 1 instruction. */
25514 /* Do reodering for post-reload scheduler only. */
25515 if (!reload_completed
)
25518 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
25520 if (sched_verbose
> 1)
25521 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
25522 INSN_UID (ready
[index
]));
25524 /* Put IMUL producer (ready[index]) at the top of ready list. */
25525 insn
= ready
[index
];
25526 for (i
= index
; i
< n_ready
- 1; i
++)
25527 ready
[i
] = ready
[i
+ 1];
25528 ready
[n_ready
- 1] = insn
;
25531 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
25533 if (sched_verbose
> 1)
25534 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
25535 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
25536 /* Swap 2 top elements of ready list. */
25537 insn
= ready
[n_ready
- 1];
25538 ready
[n_ready
- 1] = ready
[n_ready
- 2];
25539 ready
[n_ready
- 2] = insn
;
25545 ix86_class_likely_spilled_p (reg_class_t
);
25547 /* Returns true if lhs of insn is HW function argument register and set up
25548 is_spilled to true if it is likely spilled HW register. */
25550 insn_is_function_arg (rtx insn
, bool* is_spilled
)
25554 if (!NONDEBUG_INSN_P (insn
))
25556 /* Call instructions are not movable, ignore it. */
25559 insn
= PATTERN (insn
);
25560 if (GET_CODE (insn
) == PARALLEL
)
25561 insn
= XVECEXP (insn
, 0, 0);
25562 if (GET_CODE (insn
) != SET
)
25564 dst
= SET_DEST (insn
);
25565 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
25566 && ix86_function_arg_regno_p (REGNO (dst
)))
25568 /* Is it likely spilled HW register? */
25569 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
25570 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
25571 *is_spilled
= true;
25577 /* Add output dependencies for chain of function adjacent arguments if only
25578 there is a move to likely spilled HW register. Return first argument
25579 if at least one dependence was added or NULL otherwise. */
25581 add_parameter_dependencies (rtx call
, rtx head
)
25585 rtx first_arg
= NULL
;
25586 bool is_spilled
= false;
25588 head
= PREV_INSN (head
);
25590 /* Find nearest to call argument passing instruction. */
25593 last
= PREV_INSN (last
);
25596 if (!NONDEBUG_INSN_P (last
))
25598 if (insn_is_function_arg (last
, &is_spilled
))
25606 insn
= PREV_INSN (last
);
25607 if (!INSN_P (insn
))
25611 if (!NONDEBUG_INSN_P (insn
))
25616 if (insn_is_function_arg (insn
, &is_spilled
))
25618 /* Add output depdendence between two function arguments if chain
25619 of output arguments contains likely spilled HW registers. */
25621 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25622 first_arg
= last
= insn
;
25632 /* Add output or anti dependency from insn to first_arg to restrict its code
25635 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25640 set
= single_set (insn
);
25643 tmp
= SET_DEST (set
);
25646 /* Add output dependency to the first function argument. */
25647 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25650 /* Add anti dependency. */
25651 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25654 /* Avoid cross block motion of function argument through adding dependency
25655 from the first non-jump instruction in bb. */
25657 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25659 rtx insn
= BB_END (bb
);
25663 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25665 rtx set
= single_set (insn
);
25668 avoid_func_arg_motion (arg
, insn
);
25672 if (insn
== BB_HEAD (bb
))
25674 insn
= PREV_INSN (insn
);
25678 /* Hook for pre-reload schedule - avoid motion of function arguments
25679 passed in likely spilled HW registers. */
25681 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25684 rtx first_arg
= NULL
;
25685 if (reload_completed
)
25687 while (head
!= tail
&& DEBUG_INSN_P (head
))
25688 head
= NEXT_INSN (head
);
25689 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25690 if (INSN_P (insn
) && CALL_P (insn
))
25692 first_arg
= add_parameter_dependencies (insn
, head
);
25695 /* Add dependee for first argument to predecessors if only
25696 region contains more than one block. */
25697 basic_block bb
= BLOCK_FOR_INSN (insn
);
25698 int rgn
= CONTAINING_RGN (bb
->index
);
25699 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25700 /* Skip trivial regions and region head blocks that can have
25701 predecessors outside of region. */
25702 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25706 /* Assume that region is SCC, i.e. all immediate predecessors
25707 of non-head block are in the same region. */
25708 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25710 /* Avoid creating of loop-carried dependencies through
25711 using topological odering in region. */
25712 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25713 add_dependee_for_func_arg (first_arg
, e
->src
);
25721 else if (first_arg
)
25722 avoid_func_arg_motion (first_arg
, insn
);
25725 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25726 HW registers to maximum, to schedule them at soon as possible. These are
25727 moves from function argument registers at the top of the function entry
25728 and moves from function return value registers after call. */
25730 ix86_adjust_priority (rtx insn
, int priority
)
25734 if (reload_completed
)
25737 if (!NONDEBUG_INSN_P (insn
))
25740 set
= single_set (insn
);
25743 rtx tmp
= SET_SRC (set
);
25745 && HARD_REGISTER_P (tmp
)
25746 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25747 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25748 return current_sched_info
->sched_max_insns_priority
;
25754 /* Model decoder of Core 2/i7.
25755 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25756 track the instruction fetch block boundaries and make sure that long
25757 (9+ bytes) instructions are assigned to D0. */
25759 /* Maximum length of an insn that can be handled by
25760 a secondary decoder unit. '8' for Core 2/i7. */
25761 static int core2i7_secondary_decoder_max_insn_size
;
25763 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25764 '16' for Core 2/i7. */
25765 static int core2i7_ifetch_block_size
;
25767 /* Maximum number of instructions decoder can handle per cycle.
25768 '6' for Core 2/i7. */
25769 static int core2i7_ifetch_block_max_insns
;
25771 typedef struct ix86_first_cycle_multipass_data_
*
25772 ix86_first_cycle_multipass_data_t
;
25773 typedef const struct ix86_first_cycle_multipass_data_
*
25774 const_ix86_first_cycle_multipass_data_t
;
25776 /* A variable to store target state across calls to max_issue within
25778 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25779 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25781 /* Initialize DATA. */
25783 core2i7_first_cycle_multipass_init (void *_data
)
25785 ix86_first_cycle_multipass_data_t data
25786 = (ix86_first_cycle_multipass_data_t
) _data
;
25788 data
->ifetch_block_len
= 0;
25789 data
->ifetch_block_n_insns
= 0;
25790 data
->ready_try_change
= NULL
;
25791 data
->ready_try_change_size
= 0;
25794 /* Advancing the cycle; reset ifetch block counts. */
25796 core2i7_dfa_post_advance_cycle (void)
25798 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25800 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25802 data
->ifetch_block_len
= 0;
25803 data
->ifetch_block_n_insns
= 0;
25806 static int min_insn_size (rtx
);
25808 /* Filter out insns from ready_try that the core will not be able to issue
25809 on current cycle due to decoder. */
25811 core2i7_first_cycle_multipass_filter_ready_try
25812 (const_ix86_first_cycle_multipass_data_t data
,
25813 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25820 if (ready_try
[n_ready
])
25823 insn
= get_ready_element (n_ready
);
25824 insn_size
= min_insn_size (insn
);
25826 if (/* If this is a too long an insn for a secondary decoder ... */
25827 (!first_cycle_insn_p
25828 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25829 /* ... or it would not fit into the ifetch block ... */
25830 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25831 /* ... or the decoder is full already ... */
25832 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25833 /* ... mask the insn out. */
25835 ready_try
[n_ready
] = 1;
25837 if (data
->ready_try_change
)
25838 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25843 /* Prepare for a new round of multipass lookahead scheduling. */
25845 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25846 bool first_cycle_insn_p
)
25848 ix86_first_cycle_multipass_data_t data
25849 = (ix86_first_cycle_multipass_data_t
) _data
;
25850 const_ix86_first_cycle_multipass_data_t prev_data
25851 = ix86_first_cycle_multipass_data
;
25853 /* Restore the state from the end of the previous round. */
25854 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25855 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25857 /* Filter instructions that cannot be issued on current cycle due to
25858 decoder restrictions. */
25859 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25860 first_cycle_insn_p
);
25863 /* INSN is being issued in current solution. Account for its impact on
25864 the decoder model. */
25866 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
25867 rtx insn
, const void *_prev_data
)
25869 ix86_first_cycle_multipass_data_t data
25870 = (ix86_first_cycle_multipass_data_t
) _data
;
25871 const_ix86_first_cycle_multipass_data_t prev_data
25872 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
25874 int insn_size
= min_insn_size (insn
);
25876 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
25877 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
25878 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
25879 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25881 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25882 if (!data
->ready_try_change
)
25884 data
->ready_try_change
= sbitmap_alloc (n_ready
);
25885 data
->ready_try_change_size
= n_ready
;
25887 else if (data
->ready_try_change_size
< n_ready
)
25889 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
25891 data
->ready_try_change_size
= n_ready
;
25893 bitmap_clear (data
->ready_try_change
);
25895 /* Filter out insns from ready_try that the core will not be able to issue
25896 on current cycle due to decoder. */
25897 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25901 /* Revert the effect on ready_try. */
25903 core2i7_first_cycle_multipass_backtrack (const void *_data
,
25905 int n_ready ATTRIBUTE_UNUSED
)
25907 const_ix86_first_cycle_multipass_data_t data
25908 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25909 unsigned int i
= 0;
25910 sbitmap_iterator sbi
;
25912 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
25913 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
25919 /* Save the result of multipass lookahead scheduling for the next round. */
25921 core2i7_first_cycle_multipass_end (const void *_data
)
25923 const_ix86_first_cycle_multipass_data_t data
25924 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25925 ix86_first_cycle_multipass_data_t next_data
25926 = ix86_first_cycle_multipass_data
;
25930 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
25931 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
25935 /* Deallocate target data. */
25937 core2i7_first_cycle_multipass_fini (void *_data
)
25939 ix86_first_cycle_multipass_data_t data
25940 = (ix86_first_cycle_multipass_data_t
) _data
;
25942 if (data
->ready_try_change
)
25944 sbitmap_free (data
->ready_try_change
);
25945 data
->ready_try_change
= NULL
;
25946 data
->ready_try_change_size
= 0;
25950 /* Prepare for scheduling pass. */
25952 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
25953 int verbose ATTRIBUTE_UNUSED
,
25954 int max_uid ATTRIBUTE_UNUSED
)
25956 /* Install scheduling hooks for current CPU. Some of these hooks are used
25957 in time-critical parts of the scheduler, so we only set them up when
25958 they are actually used. */
25961 case PROCESSOR_CORE2
:
25962 case PROCESSOR_COREI7
:
25963 case PROCESSOR_COREI7_AVX
:
25964 case PROCESSOR_HASWELL
:
25965 /* Do not perform multipass scheduling for pre-reload schedule
25966 to save compile time. */
25967 if (reload_completed
)
25969 targetm
.sched
.dfa_post_advance_cycle
25970 = core2i7_dfa_post_advance_cycle
;
25971 targetm
.sched
.first_cycle_multipass_init
25972 = core2i7_first_cycle_multipass_init
;
25973 targetm
.sched
.first_cycle_multipass_begin
25974 = core2i7_first_cycle_multipass_begin
;
25975 targetm
.sched
.first_cycle_multipass_issue
25976 = core2i7_first_cycle_multipass_issue
;
25977 targetm
.sched
.first_cycle_multipass_backtrack
25978 = core2i7_first_cycle_multipass_backtrack
;
25979 targetm
.sched
.first_cycle_multipass_end
25980 = core2i7_first_cycle_multipass_end
;
25981 targetm
.sched
.first_cycle_multipass_fini
25982 = core2i7_first_cycle_multipass_fini
;
25984 /* Set decoder parameters. */
25985 core2i7_secondary_decoder_max_insn_size
= 8;
25986 core2i7_ifetch_block_size
= 16;
25987 core2i7_ifetch_block_max_insns
= 6;
25990 /* ... Fall through ... */
25992 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
25993 targetm
.sched
.first_cycle_multipass_init
= NULL
;
25994 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
25995 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
25996 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
25997 targetm
.sched
.first_cycle_multipass_end
= NULL
;
25998 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
26004 /* Compute the alignment given to a constant that is being placed in memory.
26005 EXP is the constant and ALIGN is the alignment that the object would
26007 The value of this function is used instead of that alignment to align
26011 ix86_constant_alignment (tree exp
, int align
)
26013 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
26014 || TREE_CODE (exp
) == INTEGER_CST
)
26016 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
26018 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
26021 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
26022 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
26023 return BITS_PER_WORD
;
26028 /* Compute the alignment for a static variable.
26029 TYPE is the data type, and ALIGN is the alignment that
26030 the object would ordinarily have. The value of this function is used
26031 instead of that alignment to align the object. */
26034 ix86_data_alignment (tree type
, int align
, bool opt
)
26036 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
26039 && AGGREGATE_TYPE_P (type
)
26040 && TYPE_SIZE (type
)
26041 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26042 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
26043 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
26044 && align
< max_align
)
26047 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26048 to 16byte boundary. */
26051 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
26052 && TYPE_SIZE (type
)
26053 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26054 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
26055 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
26062 if (TREE_CODE (type
) == ARRAY_TYPE
)
26064 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26066 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26069 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26072 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26074 if ((TYPE_MODE (type
) == XCmode
26075 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26078 else if ((TREE_CODE (type
) == RECORD_TYPE
26079 || TREE_CODE (type
) == UNION_TYPE
26080 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26081 && TYPE_FIELDS (type
))
26083 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26085 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26088 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26089 || TREE_CODE (type
) == INTEGER_TYPE
)
26091 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26093 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26100 /* Compute the alignment for a local variable or a stack slot. EXP is
26101 the data type or decl itself, MODE is the widest mode available and
26102 ALIGN is the alignment that the object would ordinarily have. The
26103 value of this macro is used instead of that alignment to align the
26107 ix86_local_alignment (tree exp
, enum machine_mode mode
,
26108 unsigned int align
)
26112 if (exp
&& DECL_P (exp
))
26114 type
= TREE_TYPE (exp
);
26123 /* Don't do dynamic stack realignment for long long objects with
26124 -mpreferred-stack-boundary=2. */
26127 && ix86_preferred_stack_boundary
< 64
26128 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26129 && (!type
|| !TYPE_USER_ALIGN (type
))
26130 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26133 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26134 register in MODE. We will return the largest alignment of XF
26138 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
26139 align
= GET_MODE_ALIGNMENT (DFmode
);
26143 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26144 to 16byte boundary. Exact wording is:
26146 An array uses the same alignment as its elements, except that a local or
26147 global array variable of length at least 16 bytes or
26148 a C99 variable-length array variable always has alignment of at least 16 bytes.
26150 This was added to allow use of aligned SSE instructions at arrays. This
26151 rule is meant for static storage (where compiler can not do the analysis
26152 by itself). We follow it for automatic variables only when convenient.
26153 We fully control everything in the function compiled and functions from
26154 other unit can not rely on the alignment.
26156 Exclude va_list type. It is the common case of local array where
26157 we can not benefit from the alignment.
26159 TODO: Probably one should optimize for size only when var is not escaping. */
26160 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
26163 if (AGGREGATE_TYPE_P (type
)
26164 && (va_list_type_node
== NULL_TREE
26165 || (TYPE_MAIN_VARIANT (type
)
26166 != TYPE_MAIN_VARIANT (va_list_type_node
)))
26167 && TYPE_SIZE (type
)
26168 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26169 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
26170 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
26173 if (TREE_CODE (type
) == ARRAY_TYPE
)
26175 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26177 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26180 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26182 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26184 if ((TYPE_MODE (type
) == XCmode
26185 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26188 else if ((TREE_CODE (type
) == RECORD_TYPE
26189 || TREE_CODE (type
) == UNION_TYPE
26190 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26191 && TYPE_FIELDS (type
))
26193 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26195 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26198 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26199 || TREE_CODE (type
) == INTEGER_TYPE
)
26202 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26204 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26210 /* Compute the minimum required alignment for dynamic stack realignment
26211 purposes for a local variable, parameter or a stack slot. EXP is
26212 the data type or decl itself, MODE is its mode and ALIGN is the
26213 alignment that the object would ordinarily have. */
26216 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
26217 unsigned int align
)
26221 if (exp
&& DECL_P (exp
))
26223 type
= TREE_TYPE (exp
);
26232 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
26235 /* Don't do dynamic stack realignment for long long objects with
26236 -mpreferred-stack-boundary=2. */
26237 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26238 && (!type
|| !TYPE_USER_ALIGN (type
))
26239 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26245 /* Find a location for the static chain incoming to a nested function.
26246 This is a register, unless all free registers are used by arguments. */
26249 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
26253 if (!DECL_STATIC_CHAIN (fndecl
))
26258 /* We always use R10 in 64-bit mode. */
26266 /* By default in 32-bit mode we use ECX to pass the static chain. */
26269 fntype
= TREE_TYPE (fndecl
);
26270 ccvt
= ix86_get_callcvt (fntype
);
26271 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
26273 /* Fastcall functions use ecx/edx for arguments, which leaves
26274 us with EAX for the static chain.
26275 Thiscall functions use ecx for arguments, which also
26276 leaves us with EAX for the static chain. */
26279 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
26281 /* Thiscall functions use ecx for arguments, which leaves
26282 us with EAX and EDX for the static chain.
26283 We are using for abi-compatibility EAX. */
26286 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
26288 /* For regparm 3, we have no free call-clobbered registers in
26289 which to store the static chain. In order to implement this,
26290 we have the trampoline push the static chain to the stack.
26291 However, we can't push a value below the return address when
26292 we call the nested function directly, so we have to use an
26293 alternate entry point. For this we use ESI, and have the
26294 alternate entry point push ESI, so that things appear the
26295 same once we're executing the nested function. */
26298 if (fndecl
== current_function_decl
)
26299 ix86_static_chain_on_stack
= true;
26300 return gen_frame_mem (SImode
,
26301 plus_constant (Pmode
,
26302 arg_pointer_rtx
, -8));
26308 return gen_rtx_REG (Pmode
, regno
);
26311 /* Emit RTL insns to initialize the variable parts of a trampoline.
26312 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26313 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26314 to be passed to the target function. */
26317 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
26323 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
26329 /* Load the function address to r11. Try to load address using
26330 the shorter movl instead of movabs. We may want to support
26331 movq for kernel mode, but kernel does not use trampolines at
26332 the moment. FNADDR is a 32bit address and may not be in
26333 DImode when ptr_mode == SImode. Always use movl in this
26335 if (ptr_mode
== SImode
26336 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
26338 fnaddr
= copy_addr_to_reg (fnaddr
);
26340 mem
= adjust_address (m_tramp
, HImode
, offset
);
26341 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
26343 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
26344 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
26349 mem
= adjust_address (m_tramp
, HImode
, offset
);
26350 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
26352 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
26353 emit_move_insn (mem
, fnaddr
);
26357 /* Load static chain using movabs to r10. Use the shorter movl
26358 instead of movabs when ptr_mode == SImode. */
26359 if (ptr_mode
== SImode
)
26370 mem
= adjust_address (m_tramp
, HImode
, offset
);
26371 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
26373 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
26374 emit_move_insn (mem
, chain_value
);
26377 /* Jump to r11; the last (unused) byte is a nop, only there to
26378 pad the write out to a single 32-bit store. */
26379 mem
= adjust_address (m_tramp
, SImode
, offset
);
26380 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
26387 /* Depending on the static chain location, either load a register
26388 with a constant, or push the constant to the stack. All of the
26389 instructions are the same size. */
26390 chain
= ix86_static_chain (fndecl
, true);
26393 switch (REGNO (chain
))
26396 opcode
= 0xb8; break;
26398 opcode
= 0xb9; break;
26400 gcc_unreachable ();
26406 mem
= adjust_address (m_tramp
, QImode
, offset
);
26407 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
26409 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26410 emit_move_insn (mem
, chain_value
);
26413 mem
= adjust_address (m_tramp
, QImode
, offset
);
26414 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
26416 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26418 /* Compute offset from the end of the jmp to the target function.
26419 In the case in which the trampoline stores the static chain on
26420 the stack, we need to skip the first insn which pushes the
26421 (call-saved) register static chain; this push is 1 byte. */
26423 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
26424 plus_constant (Pmode
, XEXP (m_tramp
, 0),
26425 offset
- (MEM_P (chain
) ? 1 : 0)),
26426 NULL_RTX
, 1, OPTAB_DIRECT
);
26427 emit_move_insn (mem
, disp
);
26430 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
26432 #ifdef HAVE_ENABLE_EXECUTE_STACK
26433 #ifdef CHECK_EXECUTE_STACK_ENABLED
26434 if (CHECK_EXECUTE_STACK_ENABLED
)
26436 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
26437 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
26441 /* The following file contains several enumerations and data structures
26442 built from the definitions in i386-builtin-types.def. */
26444 #include "i386-builtin-types.inc"
26446 /* Table for the ix86 builtin non-function types. */
26447 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
26449 /* Retrieve an element from the above table, building some of
26450 the types lazily. */
26453 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
26455 unsigned int index
;
26458 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
26460 type
= ix86_builtin_type_tab
[(int) tcode
];
26464 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
26465 if (tcode
<= IX86_BT_LAST_VECT
)
26467 enum machine_mode mode
;
26469 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
26470 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
26471 mode
= ix86_builtin_type_vect_mode
[index
];
26473 type
= build_vector_type_for_mode (itype
, mode
);
26479 index
= tcode
- IX86_BT_LAST_VECT
- 1;
26480 if (tcode
<= IX86_BT_LAST_PTR
)
26481 quals
= TYPE_UNQUALIFIED
;
26483 quals
= TYPE_QUAL_CONST
;
26485 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
26486 if (quals
!= TYPE_UNQUALIFIED
)
26487 itype
= build_qualified_type (itype
, quals
);
26489 type
= build_pointer_type (itype
);
26492 ix86_builtin_type_tab
[(int) tcode
] = type
;
26496 /* Table for the ix86 builtin function types. */
26497 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
26499 /* Retrieve an element from the above table, building some of
26500 the types lazily. */
26503 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
26507 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
26509 type
= ix86_builtin_func_type_tab
[(int) tcode
];
26513 if (tcode
<= IX86_BT_LAST_FUNC
)
26515 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
26516 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
26517 tree rtype
, atype
, args
= void_list_node
;
26520 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
26521 for (i
= after
- 1; i
> start
; --i
)
26523 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
26524 args
= tree_cons (NULL
, atype
, args
);
26527 type
= build_function_type (rtype
, args
);
26531 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
26532 enum ix86_builtin_func_type icode
;
26534 icode
= ix86_builtin_func_alias_base
[index
];
26535 type
= ix86_get_builtin_func_type (icode
);
26538 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
26543 /* Codes for all the SSE/MMX builtins. */
26546 IX86_BUILTIN_ADDPS
,
26547 IX86_BUILTIN_ADDSS
,
26548 IX86_BUILTIN_DIVPS
,
26549 IX86_BUILTIN_DIVSS
,
26550 IX86_BUILTIN_MULPS
,
26551 IX86_BUILTIN_MULSS
,
26552 IX86_BUILTIN_SUBPS
,
26553 IX86_BUILTIN_SUBSS
,
26555 IX86_BUILTIN_CMPEQPS
,
26556 IX86_BUILTIN_CMPLTPS
,
26557 IX86_BUILTIN_CMPLEPS
,
26558 IX86_BUILTIN_CMPGTPS
,
26559 IX86_BUILTIN_CMPGEPS
,
26560 IX86_BUILTIN_CMPNEQPS
,
26561 IX86_BUILTIN_CMPNLTPS
,
26562 IX86_BUILTIN_CMPNLEPS
,
26563 IX86_BUILTIN_CMPNGTPS
,
26564 IX86_BUILTIN_CMPNGEPS
,
26565 IX86_BUILTIN_CMPORDPS
,
26566 IX86_BUILTIN_CMPUNORDPS
,
26567 IX86_BUILTIN_CMPEQSS
,
26568 IX86_BUILTIN_CMPLTSS
,
26569 IX86_BUILTIN_CMPLESS
,
26570 IX86_BUILTIN_CMPNEQSS
,
26571 IX86_BUILTIN_CMPNLTSS
,
26572 IX86_BUILTIN_CMPNLESS
,
26573 IX86_BUILTIN_CMPORDSS
,
26574 IX86_BUILTIN_CMPUNORDSS
,
26576 IX86_BUILTIN_COMIEQSS
,
26577 IX86_BUILTIN_COMILTSS
,
26578 IX86_BUILTIN_COMILESS
,
26579 IX86_BUILTIN_COMIGTSS
,
26580 IX86_BUILTIN_COMIGESS
,
26581 IX86_BUILTIN_COMINEQSS
,
26582 IX86_BUILTIN_UCOMIEQSS
,
26583 IX86_BUILTIN_UCOMILTSS
,
26584 IX86_BUILTIN_UCOMILESS
,
26585 IX86_BUILTIN_UCOMIGTSS
,
26586 IX86_BUILTIN_UCOMIGESS
,
26587 IX86_BUILTIN_UCOMINEQSS
,
26589 IX86_BUILTIN_CVTPI2PS
,
26590 IX86_BUILTIN_CVTPS2PI
,
26591 IX86_BUILTIN_CVTSI2SS
,
26592 IX86_BUILTIN_CVTSI642SS
,
26593 IX86_BUILTIN_CVTSS2SI
,
26594 IX86_BUILTIN_CVTSS2SI64
,
26595 IX86_BUILTIN_CVTTPS2PI
,
26596 IX86_BUILTIN_CVTTSS2SI
,
26597 IX86_BUILTIN_CVTTSS2SI64
,
26599 IX86_BUILTIN_MAXPS
,
26600 IX86_BUILTIN_MAXSS
,
26601 IX86_BUILTIN_MINPS
,
26602 IX86_BUILTIN_MINSS
,
26604 IX86_BUILTIN_LOADUPS
,
26605 IX86_BUILTIN_STOREUPS
,
26606 IX86_BUILTIN_MOVSS
,
26608 IX86_BUILTIN_MOVHLPS
,
26609 IX86_BUILTIN_MOVLHPS
,
26610 IX86_BUILTIN_LOADHPS
,
26611 IX86_BUILTIN_LOADLPS
,
26612 IX86_BUILTIN_STOREHPS
,
26613 IX86_BUILTIN_STORELPS
,
26615 IX86_BUILTIN_MASKMOVQ
,
26616 IX86_BUILTIN_MOVMSKPS
,
26617 IX86_BUILTIN_PMOVMSKB
,
26619 IX86_BUILTIN_MOVNTPS
,
26620 IX86_BUILTIN_MOVNTQ
,
26622 IX86_BUILTIN_LOADDQU
,
26623 IX86_BUILTIN_STOREDQU
,
26625 IX86_BUILTIN_PACKSSWB
,
26626 IX86_BUILTIN_PACKSSDW
,
26627 IX86_BUILTIN_PACKUSWB
,
26629 IX86_BUILTIN_PADDB
,
26630 IX86_BUILTIN_PADDW
,
26631 IX86_BUILTIN_PADDD
,
26632 IX86_BUILTIN_PADDQ
,
26633 IX86_BUILTIN_PADDSB
,
26634 IX86_BUILTIN_PADDSW
,
26635 IX86_BUILTIN_PADDUSB
,
26636 IX86_BUILTIN_PADDUSW
,
26637 IX86_BUILTIN_PSUBB
,
26638 IX86_BUILTIN_PSUBW
,
26639 IX86_BUILTIN_PSUBD
,
26640 IX86_BUILTIN_PSUBQ
,
26641 IX86_BUILTIN_PSUBSB
,
26642 IX86_BUILTIN_PSUBSW
,
26643 IX86_BUILTIN_PSUBUSB
,
26644 IX86_BUILTIN_PSUBUSW
,
26647 IX86_BUILTIN_PANDN
,
26651 IX86_BUILTIN_PAVGB
,
26652 IX86_BUILTIN_PAVGW
,
26654 IX86_BUILTIN_PCMPEQB
,
26655 IX86_BUILTIN_PCMPEQW
,
26656 IX86_BUILTIN_PCMPEQD
,
26657 IX86_BUILTIN_PCMPGTB
,
26658 IX86_BUILTIN_PCMPGTW
,
26659 IX86_BUILTIN_PCMPGTD
,
26661 IX86_BUILTIN_PMADDWD
,
26663 IX86_BUILTIN_PMAXSW
,
26664 IX86_BUILTIN_PMAXUB
,
26665 IX86_BUILTIN_PMINSW
,
26666 IX86_BUILTIN_PMINUB
,
26668 IX86_BUILTIN_PMULHUW
,
26669 IX86_BUILTIN_PMULHW
,
26670 IX86_BUILTIN_PMULLW
,
26672 IX86_BUILTIN_PSADBW
,
26673 IX86_BUILTIN_PSHUFW
,
26675 IX86_BUILTIN_PSLLW
,
26676 IX86_BUILTIN_PSLLD
,
26677 IX86_BUILTIN_PSLLQ
,
26678 IX86_BUILTIN_PSRAW
,
26679 IX86_BUILTIN_PSRAD
,
26680 IX86_BUILTIN_PSRLW
,
26681 IX86_BUILTIN_PSRLD
,
26682 IX86_BUILTIN_PSRLQ
,
26683 IX86_BUILTIN_PSLLWI
,
26684 IX86_BUILTIN_PSLLDI
,
26685 IX86_BUILTIN_PSLLQI
,
26686 IX86_BUILTIN_PSRAWI
,
26687 IX86_BUILTIN_PSRADI
,
26688 IX86_BUILTIN_PSRLWI
,
26689 IX86_BUILTIN_PSRLDI
,
26690 IX86_BUILTIN_PSRLQI
,
26692 IX86_BUILTIN_PUNPCKHBW
,
26693 IX86_BUILTIN_PUNPCKHWD
,
26694 IX86_BUILTIN_PUNPCKHDQ
,
26695 IX86_BUILTIN_PUNPCKLBW
,
26696 IX86_BUILTIN_PUNPCKLWD
,
26697 IX86_BUILTIN_PUNPCKLDQ
,
26699 IX86_BUILTIN_SHUFPS
,
26701 IX86_BUILTIN_RCPPS
,
26702 IX86_BUILTIN_RCPSS
,
26703 IX86_BUILTIN_RSQRTPS
,
26704 IX86_BUILTIN_RSQRTPS_NR
,
26705 IX86_BUILTIN_RSQRTSS
,
26706 IX86_BUILTIN_RSQRTF
,
26707 IX86_BUILTIN_SQRTPS
,
26708 IX86_BUILTIN_SQRTPS_NR
,
26709 IX86_BUILTIN_SQRTSS
,
26711 IX86_BUILTIN_UNPCKHPS
,
26712 IX86_BUILTIN_UNPCKLPS
,
26714 IX86_BUILTIN_ANDPS
,
26715 IX86_BUILTIN_ANDNPS
,
26717 IX86_BUILTIN_XORPS
,
26720 IX86_BUILTIN_LDMXCSR
,
26721 IX86_BUILTIN_STMXCSR
,
26722 IX86_BUILTIN_SFENCE
,
26724 IX86_BUILTIN_FXSAVE
,
26725 IX86_BUILTIN_FXRSTOR
,
26726 IX86_BUILTIN_FXSAVE64
,
26727 IX86_BUILTIN_FXRSTOR64
,
26729 IX86_BUILTIN_XSAVE
,
26730 IX86_BUILTIN_XRSTOR
,
26731 IX86_BUILTIN_XSAVE64
,
26732 IX86_BUILTIN_XRSTOR64
,
26734 IX86_BUILTIN_XSAVEOPT
,
26735 IX86_BUILTIN_XSAVEOPT64
,
26737 /* 3DNow! Original */
26738 IX86_BUILTIN_FEMMS
,
26739 IX86_BUILTIN_PAVGUSB
,
26740 IX86_BUILTIN_PF2ID
,
26741 IX86_BUILTIN_PFACC
,
26742 IX86_BUILTIN_PFADD
,
26743 IX86_BUILTIN_PFCMPEQ
,
26744 IX86_BUILTIN_PFCMPGE
,
26745 IX86_BUILTIN_PFCMPGT
,
26746 IX86_BUILTIN_PFMAX
,
26747 IX86_BUILTIN_PFMIN
,
26748 IX86_BUILTIN_PFMUL
,
26749 IX86_BUILTIN_PFRCP
,
26750 IX86_BUILTIN_PFRCPIT1
,
26751 IX86_BUILTIN_PFRCPIT2
,
26752 IX86_BUILTIN_PFRSQIT1
,
26753 IX86_BUILTIN_PFRSQRT
,
26754 IX86_BUILTIN_PFSUB
,
26755 IX86_BUILTIN_PFSUBR
,
26756 IX86_BUILTIN_PI2FD
,
26757 IX86_BUILTIN_PMULHRW
,
26759 /* 3DNow! Athlon Extensions */
26760 IX86_BUILTIN_PF2IW
,
26761 IX86_BUILTIN_PFNACC
,
26762 IX86_BUILTIN_PFPNACC
,
26763 IX86_BUILTIN_PI2FW
,
26764 IX86_BUILTIN_PSWAPDSI
,
26765 IX86_BUILTIN_PSWAPDSF
,
26768 IX86_BUILTIN_ADDPD
,
26769 IX86_BUILTIN_ADDSD
,
26770 IX86_BUILTIN_DIVPD
,
26771 IX86_BUILTIN_DIVSD
,
26772 IX86_BUILTIN_MULPD
,
26773 IX86_BUILTIN_MULSD
,
26774 IX86_BUILTIN_SUBPD
,
26775 IX86_BUILTIN_SUBSD
,
26777 IX86_BUILTIN_CMPEQPD
,
26778 IX86_BUILTIN_CMPLTPD
,
26779 IX86_BUILTIN_CMPLEPD
,
26780 IX86_BUILTIN_CMPGTPD
,
26781 IX86_BUILTIN_CMPGEPD
,
26782 IX86_BUILTIN_CMPNEQPD
,
26783 IX86_BUILTIN_CMPNLTPD
,
26784 IX86_BUILTIN_CMPNLEPD
,
26785 IX86_BUILTIN_CMPNGTPD
,
26786 IX86_BUILTIN_CMPNGEPD
,
26787 IX86_BUILTIN_CMPORDPD
,
26788 IX86_BUILTIN_CMPUNORDPD
,
26789 IX86_BUILTIN_CMPEQSD
,
26790 IX86_BUILTIN_CMPLTSD
,
26791 IX86_BUILTIN_CMPLESD
,
26792 IX86_BUILTIN_CMPNEQSD
,
26793 IX86_BUILTIN_CMPNLTSD
,
26794 IX86_BUILTIN_CMPNLESD
,
26795 IX86_BUILTIN_CMPORDSD
,
26796 IX86_BUILTIN_CMPUNORDSD
,
26798 IX86_BUILTIN_COMIEQSD
,
26799 IX86_BUILTIN_COMILTSD
,
26800 IX86_BUILTIN_COMILESD
,
26801 IX86_BUILTIN_COMIGTSD
,
26802 IX86_BUILTIN_COMIGESD
,
26803 IX86_BUILTIN_COMINEQSD
,
26804 IX86_BUILTIN_UCOMIEQSD
,
26805 IX86_BUILTIN_UCOMILTSD
,
26806 IX86_BUILTIN_UCOMILESD
,
26807 IX86_BUILTIN_UCOMIGTSD
,
26808 IX86_BUILTIN_UCOMIGESD
,
26809 IX86_BUILTIN_UCOMINEQSD
,
26811 IX86_BUILTIN_MAXPD
,
26812 IX86_BUILTIN_MAXSD
,
26813 IX86_BUILTIN_MINPD
,
26814 IX86_BUILTIN_MINSD
,
26816 IX86_BUILTIN_ANDPD
,
26817 IX86_BUILTIN_ANDNPD
,
26819 IX86_BUILTIN_XORPD
,
26821 IX86_BUILTIN_SQRTPD
,
26822 IX86_BUILTIN_SQRTSD
,
26824 IX86_BUILTIN_UNPCKHPD
,
26825 IX86_BUILTIN_UNPCKLPD
,
26827 IX86_BUILTIN_SHUFPD
,
26829 IX86_BUILTIN_LOADUPD
,
26830 IX86_BUILTIN_STOREUPD
,
26831 IX86_BUILTIN_MOVSD
,
26833 IX86_BUILTIN_LOADHPD
,
26834 IX86_BUILTIN_LOADLPD
,
26836 IX86_BUILTIN_CVTDQ2PD
,
26837 IX86_BUILTIN_CVTDQ2PS
,
26839 IX86_BUILTIN_CVTPD2DQ
,
26840 IX86_BUILTIN_CVTPD2PI
,
26841 IX86_BUILTIN_CVTPD2PS
,
26842 IX86_BUILTIN_CVTTPD2DQ
,
26843 IX86_BUILTIN_CVTTPD2PI
,
26845 IX86_BUILTIN_CVTPI2PD
,
26846 IX86_BUILTIN_CVTSI2SD
,
26847 IX86_BUILTIN_CVTSI642SD
,
26849 IX86_BUILTIN_CVTSD2SI
,
26850 IX86_BUILTIN_CVTSD2SI64
,
26851 IX86_BUILTIN_CVTSD2SS
,
26852 IX86_BUILTIN_CVTSS2SD
,
26853 IX86_BUILTIN_CVTTSD2SI
,
26854 IX86_BUILTIN_CVTTSD2SI64
,
26856 IX86_BUILTIN_CVTPS2DQ
,
26857 IX86_BUILTIN_CVTPS2PD
,
26858 IX86_BUILTIN_CVTTPS2DQ
,
26860 IX86_BUILTIN_MOVNTI
,
26861 IX86_BUILTIN_MOVNTI64
,
26862 IX86_BUILTIN_MOVNTPD
,
26863 IX86_BUILTIN_MOVNTDQ
,
26865 IX86_BUILTIN_MOVQ128
,
26868 IX86_BUILTIN_MASKMOVDQU
,
26869 IX86_BUILTIN_MOVMSKPD
,
26870 IX86_BUILTIN_PMOVMSKB128
,
26872 IX86_BUILTIN_PACKSSWB128
,
26873 IX86_BUILTIN_PACKSSDW128
,
26874 IX86_BUILTIN_PACKUSWB128
,
26876 IX86_BUILTIN_PADDB128
,
26877 IX86_BUILTIN_PADDW128
,
26878 IX86_BUILTIN_PADDD128
,
26879 IX86_BUILTIN_PADDQ128
,
26880 IX86_BUILTIN_PADDSB128
,
26881 IX86_BUILTIN_PADDSW128
,
26882 IX86_BUILTIN_PADDUSB128
,
26883 IX86_BUILTIN_PADDUSW128
,
26884 IX86_BUILTIN_PSUBB128
,
26885 IX86_BUILTIN_PSUBW128
,
26886 IX86_BUILTIN_PSUBD128
,
26887 IX86_BUILTIN_PSUBQ128
,
26888 IX86_BUILTIN_PSUBSB128
,
26889 IX86_BUILTIN_PSUBSW128
,
26890 IX86_BUILTIN_PSUBUSB128
,
26891 IX86_BUILTIN_PSUBUSW128
,
26893 IX86_BUILTIN_PAND128
,
26894 IX86_BUILTIN_PANDN128
,
26895 IX86_BUILTIN_POR128
,
26896 IX86_BUILTIN_PXOR128
,
26898 IX86_BUILTIN_PAVGB128
,
26899 IX86_BUILTIN_PAVGW128
,
26901 IX86_BUILTIN_PCMPEQB128
,
26902 IX86_BUILTIN_PCMPEQW128
,
26903 IX86_BUILTIN_PCMPEQD128
,
26904 IX86_BUILTIN_PCMPGTB128
,
26905 IX86_BUILTIN_PCMPGTW128
,
26906 IX86_BUILTIN_PCMPGTD128
,
26908 IX86_BUILTIN_PMADDWD128
,
26910 IX86_BUILTIN_PMAXSW128
,
26911 IX86_BUILTIN_PMAXUB128
,
26912 IX86_BUILTIN_PMINSW128
,
26913 IX86_BUILTIN_PMINUB128
,
26915 IX86_BUILTIN_PMULUDQ
,
26916 IX86_BUILTIN_PMULUDQ128
,
26917 IX86_BUILTIN_PMULHUW128
,
26918 IX86_BUILTIN_PMULHW128
,
26919 IX86_BUILTIN_PMULLW128
,
26921 IX86_BUILTIN_PSADBW128
,
26922 IX86_BUILTIN_PSHUFHW
,
26923 IX86_BUILTIN_PSHUFLW
,
26924 IX86_BUILTIN_PSHUFD
,
26926 IX86_BUILTIN_PSLLDQI128
,
26927 IX86_BUILTIN_PSLLWI128
,
26928 IX86_BUILTIN_PSLLDI128
,
26929 IX86_BUILTIN_PSLLQI128
,
26930 IX86_BUILTIN_PSRAWI128
,
26931 IX86_BUILTIN_PSRADI128
,
26932 IX86_BUILTIN_PSRLDQI128
,
26933 IX86_BUILTIN_PSRLWI128
,
26934 IX86_BUILTIN_PSRLDI128
,
26935 IX86_BUILTIN_PSRLQI128
,
26937 IX86_BUILTIN_PSLLDQ128
,
26938 IX86_BUILTIN_PSLLW128
,
26939 IX86_BUILTIN_PSLLD128
,
26940 IX86_BUILTIN_PSLLQ128
,
26941 IX86_BUILTIN_PSRAW128
,
26942 IX86_BUILTIN_PSRAD128
,
26943 IX86_BUILTIN_PSRLW128
,
26944 IX86_BUILTIN_PSRLD128
,
26945 IX86_BUILTIN_PSRLQ128
,
26947 IX86_BUILTIN_PUNPCKHBW128
,
26948 IX86_BUILTIN_PUNPCKHWD128
,
26949 IX86_BUILTIN_PUNPCKHDQ128
,
26950 IX86_BUILTIN_PUNPCKHQDQ128
,
26951 IX86_BUILTIN_PUNPCKLBW128
,
26952 IX86_BUILTIN_PUNPCKLWD128
,
26953 IX86_BUILTIN_PUNPCKLDQ128
,
26954 IX86_BUILTIN_PUNPCKLQDQ128
,
26956 IX86_BUILTIN_CLFLUSH
,
26957 IX86_BUILTIN_MFENCE
,
26958 IX86_BUILTIN_LFENCE
,
26959 IX86_BUILTIN_PAUSE
,
26961 IX86_BUILTIN_BSRSI
,
26962 IX86_BUILTIN_BSRDI
,
26963 IX86_BUILTIN_RDPMC
,
26964 IX86_BUILTIN_RDTSC
,
26965 IX86_BUILTIN_RDTSCP
,
26966 IX86_BUILTIN_ROLQI
,
26967 IX86_BUILTIN_ROLHI
,
26968 IX86_BUILTIN_RORQI
,
26969 IX86_BUILTIN_RORHI
,
26972 IX86_BUILTIN_ADDSUBPS
,
26973 IX86_BUILTIN_HADDPS
,
26974 IX86_BUILTIN_HSUBPS
,
26975 IX86_BUILTIN_MOVSHDUP
,
26976 IX86_BUILTIN_MOVSLDUP
,
26977 IX86_BUILTIN_ADDSUBPD
,
26978 IX86_BUILTIN_HADDPD
,
26979 IX86_BUILTIN_HSUBPD
,
26980 IX86_BUILTIN_LDDQU
,
26982 IX86_BUILTIN_MONITOR
,
26983 IX86_BUILTIN_MWAIT
,
26986 IX86_BUILTIN_PHADDW
,
26987 IX86_BUILTIN_PHADDD
,
26988 IX86_BUILTIN_PHADDSW
,
26989 IX86_BUILTIN_PHSUBW
,
26990 IX86_BUILTIN_PHSUBD
,
26991 IX86_BUILTIN_PHSUBSW
,
26992 IX86_BUILTIN_PMADDUBSW
,
26993 IX86_BUILTIN_PMULHRSW
,
26994 IX86_BUILTIN_PSHUFB
,
26995 IX86_BUILTIN_PSIGNB
,
26996 IX86_BUILTIN_PSIGNW
,
26997 IX86_BUILTIN_PSIGND
,
26998 IX86_BUILTIN_PALIGNR
,
26999 IX86_BUILTIN_PABSB
,
27000 IX86_BUILTIN_PABSW
,
27001 IX86_BUILTIN_PABSD
,
27003 IX86_BUILTIN_PHADDW128
,
27004 IX86_BUILTIN_PHADDD128
,
27005 IX86_BUILTIN_PHADDSW128
,
27006 IX86_BUILTIN_PHSUBW128
,
27007 IX86_BUILTIN_PHSUBD128
,
27008 IX86_BUILTIN_PHSUBSW128
,
27009 IX86_BUILTIN_PMADDUBSW128
,
27010 IX86_BUILTIN_PMULHRSW128
,
27011 IX86_BUILTIN_PSHUFB128
,
27012 IX86_BUILTIN_PSIGNB128
,
27013 IX86_BUILTIN_PSIGNW128
,
27014 IX86_BUILTIN_PSIGND128
,
27015 IX86_BUILTIN_PALIGNR128
,
27016 IX86_BUILTIN_PABSB128
,
27017 IX86_BUILTIN_PABSW128
,
27018 IX86_BUILTIN_PABSD128
,
27020 /* AMDFAM10 - SSE4A New Instructions. */
27021 IX86_BUILTIN_MOVNTSD
,
27022 IX86_BUILTIN_MOVNTSS
,
27023 IX86_BUILTIN_EXTRQI
,
27024 IX86_BUILTIN_EXTRQ
,
27025 IX86_BUILTIN_INSERTQI
,
27026 IX86_BUILTIN_INSERTQ
,
27029 IX86_BUILTIN_BLENDPD
,
27030 IX86_BUILTIN_BLENDPS
,
27031 IX86_BUILTIN_BLENDVPD
,
27032 IX86_BUILTIN_BLENDVPS
,
27033 IX86_BUILTIN_PBLENDVB128
,
27034 IX86_BUILTIN_PBLENDW128
,
27039 IX86_BUILTIN_INSERTPS128
,
27041 IX86_BUILTIN_MOVNTDQA
,
27042 IX86_BUILTIN_MPSADBW128
,
27043 IX86_BUILTIN_PACKUSDW128
,
27044 IX86_BUILTIN_PCMPEQQ
,
27045 IX86_BUILTIN_PHMINPOSUW128
,
27047 IX86_BUILTIN_PMAXSB128
,
27048 IX86_BUILTIN_PMAXSD128
,
27049 IX86_BUILTIN_PMAXUD128
,
27050 IX86_BUILTIN_PMAXUW128
,
27052 IX86_BUILTIN_PMINSB128
,
27053 IX86_BUILTIN_PMINSD128
,
27054 IX86_BUILTIN_PMINUD128
,
27055 IX86_BUILTIN_PMINUW128
,
27057 IX86_BUILTIN_PMOVSXBW128
,
27058 IX86_BUILTIN_PMOVSXBD128
,
27059 IX86_BUILTIN_PMOVSXBQ128
,
27060 IX86_BUILTIN_PMOVSXWD128
,
27061 IX86_BUILTIN_PMOVSXWQ128
,
27062 IX86_BUILTIN_PMOVSXDQ128
,
27064 IX86_BUILTIN_PMOVZXBW128
,
27065 IX86_BUILTIN_PMOVZXBD128
,
27066 IX86_BUILTIN_PMOVZXBQ128
,
27067 IX86_BUILTIN_PMOVZXWD128
,
27068 IX86_BUILTIN_PMOVZXWQ128
,
27069 IX86_BUILTIN_PMOVZXDQ128
,
27071 IX86_BUILTIN_PMULDQ128
,
27072 IX86_BUILTIN_PMULLD128
,
27074 IX86_BUILTIN_ROUNDSD
,
27075 IX86_BUILTIN_ROUNDSS
,
27077 IX86_BUILTIN_ROUNDPD
,
27078 IX86_BUILTIN_ROUNDPS
,
27080 IX86_BUILTIN_FLOORPD
,
27081 IX86_BUILTIN_CEILPD
,
27082 IX86_BUILTIN_TRUNCPD
,
27083 IX86_BUILTIN_RINTPD
,
27084 IX86_BUILTIN_ROUNDPD_AZ
,
27086 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
27087 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
27088 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
27090 IX86_BUILTIN_FLOORPS
,
27091 IX86_BUILTIN_CEILPS
,
27092 IX86_BUILTIN_TRUNCPS
,
27093 IX86_BUILTIN_RINTPS
,
27094 IX86_BUILTIN_ROUNDPS_AZ
,
27096 IX86_BUILTIN_FLOORPS_SFIX
,
27097 IX86_BUILTIN_CEILPS_SFIX
,
27098 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
27100 IX86_BUILTIN_PTESTZ
,
27101 IX86_BUILTIN_PTESTC
,
27102 IX86_BUILTIN_PTESTNZC
,
27104 IX86_BUILTIN_VEC_INIT_V2SI
,
27105 IX86_BUILTIN_VEC_INIT_V4HI
,
27106 IX86_BUILTIN_VEC_INIT_V8QI
,
27107 IX86_BUILTIN_VEC_EXT_V2DF
,
27108 IX86_BUILTIN_VEC_EXT_V2DI
,
27109 IX86_BUILTIN_VEC_EXT_V4SF
,
27110 IX86_BUILTIN_VEC_EXT_V4SI
,
27111 IX86_BUILTIN_VEC_EXT_V8HI
,
27112 IX86_BUILTIN_VEC_EXT_V2SI
,
27113 IX86_BUILTIN_VEC_EXT_V4HI
,
27114 IX86_BUILTIN_VEC_EXT_V16QI
,
27115 IX86_BUILTIN_VEC_SET_V2DI
,
27116 IX86_BUILTIN_VEC_SET_V4SF
,
27117 IX86_BUILTIN_VEC_SET_V4SI
,
27118 IX86_BUILTIN_VEC_SET_V8HI
,
27119 IX86_BUILTIN_VEC_SET_V4HI
,
27120 IX86_BUILTIN_VEC_SET_V16QI
,
27122 IX86_BUILTIN_VEC_PACK_SFIX
,
27123 IX86_BUILTIN_VEC_PACK_SFIX256
,
27126 IX86_BUILTIN_CRC32QI
,
27127 IX86_BUILTIN_CRC32HI
,
27128 IX86_BUILTIN_CRC32SI
,
27129 IX86_BUILTIN_CRC32DI
,
27131 IX86_BUILTIN_PCMPESTRI128
,
27132 IX86_BUILTIN_PCMPESTRM128
,
27133 IX86_BUILTIN_PCMPESTRA128
,
27134 IX86_BUILTIN_PCMPESTRC128
,
27135 IX86_BUILTIN_PCMPESTRO128
,
27136 IX86_BUILTIN_PCMPESTRS128
,
27137 IX86_BUILTIN_PCMPESTRZ128
,
27138 IX86_BUILTIN_PCMPISTRI128
,
27139 IX86_BUILTIN_PCMPISTRM128
,
27140 IX86_BUILTIN_PCMPISTRA128
,
27141 IX86_BUILTIN_PCMPISTRC128
,
27142 IX86_BUILTIN_PCMPISTRO128
,
27143 IX86_BUILTIN_PCMPISTRS128
,
27144 IX86_BUILTIN_PCMPISTRZ128
,
27146 IX86_BUILTIN_PCMPGTQ
,
27148 /* AES instructions */
27149 IX86_BUILTIN_AESENC128
,
27150 IX86_BUILTIN_AESENCLAST128
,
27151 IX86_BUILTIN_AESDEC128
,
27152 IX86_BUILTIN_AESDECLAST128
,
27153 IX86_BUILTIN_AESIMC128
,
27154 IX86_BUILTIN_AESKEYGENASSIST128
,
27156 /* PCLMUL instruction */
27157 IX86_BUILTIN_PCLMULQDQ128
,
27160 IX86_BUILTIN_ADDPD256
,
27161 IX86_BUILTIN_ADDPS256
,
27162 IX86_BUILTIN_ADDSUBPD256
,
27163 IX86_BUILTIN_ADDSUBPS256
,
27164 IX86_BUILTIN_ANDPD256
,
27165 IX86_BUILTIN_ANDPS256
,
27166 IX86_BUILTIN_ANDNPD256
,
27167 IX86_BUILTIN_ANDNPS256
,
27168 IX86_BUILTIN_BLENDPD256
,
27169 IX86_BUILTIN_BLENDPS256
,
27170 IX86_BUILTIN_BLENDVPD256
,
27171 IX86_BUILTIN_BLENDVPS256
,
27172 IX86_BUILTIN_DIVPD256
,
27173 IX86_BUILTIN_DIVPS256
,
27174 IX86_BUILTIN_DPPS256
,
27175 IX86_BUILTIN_HADDPD256
,
27176 IX86_BUILTIN_HADDPS256
,
27177 IX86_BUILTIN_HSUBPD256
,
27178 IX86_BUILTIN_HSUBPS256
,
27179 IX86_BUILTIN_MAXPD256
,
27180 IX86_BUILTIN_MAXPS256
,
27181 IX86_BUILTIN_MINPD256
,
27182 IX86_BUILTIN_MINPS256
,
27183 IX86_BUILTIN_MULPD256
,
27184 IX86_BUILTIN_MULPS256
,
27185 IX86_BUILTIN_ORPD256
,
27186 IX86_BUILTIN_ORPS256
,
27187 IX86_BUILTIN_SHUFPD256
,
27188 IX86_BUILTIN_SHUFPS256
,
27189 IX86_BUILTIN_SUBPD256
,
27190 IX86_BUILTIN_SUBPS256
,
27191 IX86_BUILTIN_XORPD256
,
27192 IX86_BUILTIN_XORPS256
,
27193 IX86_BUILTIN_CMPSD
,
27194 IX86_BUILTIN_CMPSS
,
27195 IX86_BUILTIN_CMPPD
,
27196 IX86_BUILTIN_CMPPS
,
27197 IX86_BUILTIN_CMPPD256
,
27198 IX86_BUILTIN_CMPPS256
,
27199 IX86_BUILTIN_CVTDQ2PD256
,
27200 IX86_BUILTIN_CVTDQ2PS256
,
27201 IX86_BUILTIN_CVTPD2PS256
,
27202 IX86_BUILTIN_CVTPS2DQ256
,
27203 IX86_BUILTIN_CVTPS2PD256
,
27204 IX86_BUILTIN_CVTTPD2DQ256
,
27205 IX86_BUILTIN_CVTPD2DQ256
,
27206 IX86_BUILTIN_CVTTPS2DQ256
,
27207 IX86_BUILTIN_EXTRACTF128PD256
,
27208 IX86_BUILTIN_EXTRACTF128PS256
,
27209 IX86_BUILTIN_EXTRACTF128SI256
,
27210 IX86_BUILTIN_VZEROALL
,
27211 IX86_BUILTIN_VZEROUPPER
,
27212 IX86_BUILTIN_VPERMILVARPD
,
27213 IX86_BUILTIN_VPERMILVARPS
,
27214 IX86_BUILTIN_VPERMILVARPD256
,
27215 IX86_BUILTIN_VPERMILVARPS256
,
27216 IX86_BUILTIN_VPERMILPD
,
27217 IX86_BUILTIN_VPERMILPS
,
27218 IX86_BUILTIN_VPERMILPD256
,
27219 IX86_BUILTIN_VPERMILPS256
,
27220 IX86_BUILTIN_VPERMIL2PD
,
27221 IX86_BUILTIN_VPERMIL2PS
,
27222 IX86_BUILTIN_VPERMIL2PD256
,
27223 IX86_BUILTIN_VPERMIL2PS256
,
27224 IX86_BUILTIN_VPERM2F128PD256
,
27225 IX86_BUILTIN_VPERM2F128PS256
,
27226 IX86_BUILTIN_VPERM2F128SI256
,
27227 IX86_BUILTIN_VBROADCASTSS
,
27228 IX86_BUILTIN_VBROADCASTSD256
,
27229 IX86_BUILTIN_VBROADCASTSS256
,
27230 IX86_BUILTIN_VBROADCASTPD256
,
27231 IX86_BUILTIN_VBROADCASTPS256
,
27232 IX86_BUILTIN_VINSERTF128PD256
,
27233 IX86_BUILTIN_VINSERTF128PS256
,
27234 IX86_BUILTIN_VINSERTF128SI256
,
27235 IX86_BUILTIN_LOADUPD256
,
27236 IX86_BUILTIN_LOADUPS256
,
27237 IX86_BUILTIN_STOREUPD256
,
27238 IX86_BUILTIN_STOREUPS256
,
27239 IX86_BUILTIN_LDDQU256
,
27240 IX86_BUILTIN_MOVNTDQ256
,
27241 IX86_BUILTIN_MOVNTPD256
,
27242 IX86_BUILTIN_MOVNTPS256
,
27243 IX86_BUILTIN_LOADDQU256
,
27244 IX86_BUILTIN_STOREDQU256
,
27245 IX86_BUILTIN_MASKLOADPD
,
27246 IX86_BUILTIN_MASKLOADPS
,
27247 IX86_BUILTIN_MASKSTOREPD
,
27248 IX86_BUILTIN_MASKSTOREPS
,
27249 IX86_BUILTIN_MASKLOADPD256
,
27250 IX86_BUILTIN_MASKLOADPS256
,
27251 IX86_BUILTIN_MASKSTOREPD256
,
27252 IX86_BUILTIN_MASKSTOREPS256
,
27253 IX86_BUILTIN_MOVSHDUP256
,
27254 IX86_BUILTIN_MOVSLDUP256
,
27255 IX86_BUILTIN_MOVDDUP256
,
27257 IX86_BUILTIN_SQRTPD256
,
27258 IX86_BUILTIN_SQRTPS256
,
27259 IX86_BUILTIN_SQRTPS_NR256
,
27260 IX86_BUILTIN_RSQRTPS256
,
27261 IX86_BUILTIN_RSQRTPS_NR256
,
27263 IX86_BUILTIN_RCPPS256
,
27265 IX86_BUILTIN_ROUNDPD256
,
27266 IX86_BUILTIN_ROUNDPS256
,
27268 IX86_BUILTIN_FLOORPD256
,
27269 IX86_BUILTIN_CEILPD256
,
27270 IX86_BUILTIN_TRUNCPD256
,
27271 IX86_BUILTIN_RINTPD256
,
27272 IX86_BUILTIN_ROUNDPD_AZ256
,
27274 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
27275 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
27276 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
27278 IX86_BUILTIN_FLOORPS256
,
27279 IX86_BUILTIN_CEILPS256
,
27280 IX86_BUILTIN_TRUNCPS256
,
27281 IX86_BUILTIN_RINTPS256
,
27282 IX86_BUILTIN_ROUNDPS_AZ256
,
27284 IX86_BUILTIN_FLOORPS_SFIX256
,
27285 IX86_BUILTIN_CEILPS_SFIX256
,
27286 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
27288 IX86_BUILTIN_UNPCKHPD256
,
27289 IX86_BUILTIN_UNPCKLPD256
,
27290 IX86_BUILTIN_UNPCKHPS256
,
27291 IX86_BUILTIN_UNPCKLPS256
,
27293 IX86_BUILTIN_SI256_SI
,
27294 IX86_BUILTIN_PS256_PS
,
27295 IX86_BUILTIN_PD256_PD
,
27296 IX86_BUILTIN_SI_SI256
,
27297 IX86_BUILTIN_PS_PS256
,
27298 IX86_BUILTIN_PD_PD256
,
27300 IX86_BUILTIN_VTESTZPD
,
27301 IX86_BUILTIN_VTESTCPD
,
27302 IX86_BUILTIN_VTESTNZCPD
,
27303 IX86_BUILTIN_VTESTZPS
,
27304 IX86_BUILTIN_VTESTCPS
,
27305 IX86_BUILTIN_VTESTNZCPS
,
27306 IX86_BUILTIN_VTESTZPD256
,
27307 IX86_BUILTIN_VTESTCPD256
,
27308 IX86_BUILTIN_VTESTNZCPD256
,
27309 IX86_BUILTIN_VTESTZPS256
,
27310 IX86_BUILTIN_VTESTCPS256
,
27311 IX86_BUILTIN_VTESTNZCPS256
,
27312 IX86_BUILTIN_PTESTZ256
,
27313 IX86_BUILTIN_PTESTC256
,
27314 IX86_BUILTIN_PTESTNZC256
,
27316 IX86_BUILTIN_MOVMSKPD256
,
27317 IX86_BUILTIN_MOVMSKPS256
,
27320 IX86_BUILTIN_MPSADBW256
,
27321 IX86_BUILTIN_PABSB256
,
27322 IX86_BUILTIN_PABSW256
,
27323 IX86_BUILTIN_PABSD256
,
27324 IX86_BUILTIN_PACKSSDW256
,
27325 IX86_BUILTIN_PACKSSWB256
,
27326 IX86_BUILTIN_PACKUSDW256
,
27327 IX86_BUILTIN_PACKUSWB256
,
27328 IX86_BUILTIN_PADDB256
,
27329 IX86_BUILTIN_PADDW256
,
27330 IX86_BUILTIN_PADDD256
,
27331 IX86_BUILTIN_PADDQ256
,
27332 IX86_BUILTIN_PADDSB256
,
27333 IX86_BUILTIN_PADDSW256
,
27334 IX86_BUILTIN_PADDUSB256
,
27335 IX86_BUILTIN_PADDUSW256
,
27336 IX86_BUILTIN_PALIGNR256
,
27337 IX86_BUILTIN_AND256I
,
27338 IX86_BUILTIN_ANDNOT256I
,
27339 IX86_BUILTIN_PAVGB256
,
27340 IX86_BUILTIN_PAVGW256
,
27341 IX86_BUILTIN_PBLENDVB256
,
27342 IX86_BUILTIN_PBLENDVW256
,
27343 IX86_BUILTIN_PCMPEQB256
,
27344 IX86_BUILTIN_PCMPEQW256
,
27345 IX86_BUILTIN_PCMPEQD256
,
27346 IX86_BUILTIN_PCMPEQQ256
,
27347 IX86_BUILTIN_PCMPGTB256
,
27348 IX86_BUILTIN_PCMPGTW256
,
27349 IX86_BUILTIN_PCMPGTD256
,
27350 IX86_BUILTIN_PCMPGTQ256
,
27351 IX86_BUILTIN_PHADDW256
,
27352 IX86_BUILTIN_PHADDD256
,
27353 IX86_BUILTIN_PHADDSW256
,
27354 IX86_BUILTIN_PHSUBW256
,
27355 IX86_BUILTIN_PHSUBD256
,
27356 IX86_BUILTIN_PHSUBSW256
,
27357 IX86_BUILTIN_PMADDUBSW256
,
27358 IX86_BUILTIN_PMADDWD256
,
27359 IX86_BUILTIN_PMAXSB256
,
27360 IX86_BUILTIN_PMAXSW256
,
27361 IX86_BUILTIN_PMAXSD256
,
27362 IX86_BUILTIN_PMAXUB256
,
27363 IX86_BUILTIN_PMAXUW256
,
27364 IX86_BUILTIN_PMAXUD256
,
27365 IX86_BUILTIN_PMINSB256
,
27366 IX86_BUILTIN_PMINSW256
,
27367 IX86_BUILTIN_PMINSD256
,
27368 IX86_BUILTIN_PMINUB256
,
27369 IX86_BUILTIN_PMINUW256
,
27370 IX86_BUILTIN_PMINUD256
,
27371 IX86_BUILTIN_PMOVMSKB256
,
27372 IX86_BUILTIN_PMOVSXBW256
,
27373 IX86_BUILTIN_PMOVSXBD256
,
27374 IX86_BUILTIN_PMOVSXBQ256
,
27375 IX86_BUILTIN_PMOVSXWD256
,
27376 IX86_BUILTIN_PMOVSXWQ256
,
27377 IX86_BUILTIN_PMOVSXDQ256
,
27378 IX86_BUILTIN_PMOVZXBW256
,
27379 IX86_BUILTIN_PMOVZXBD256
,
27380 IX86_BUILTIN_PMOVZXBQ256
,
27381 IX86_BUILTIN_PMOVZXWD256
,
27382 IX86_BUILTIN_PMOVZXWQ256
,
27383 IX86_BUILTIN_PMOVZXDQ256
,
27384 IX86_BUILTIN_PMULDQ256
,
27385 IX86_BUILTIN_PMULHRSW256
,
27386 IX86_BUILTIN_PMULHUW256
,
27387 IX86_BUILTIN_PMULHW256
,
27388 IX86_BUILTIN_PMULLW256
,
27389 IX86_BUILTIN_PMULLD256
,
27390 IX86_BUILTIN_PMULUDQ256
,
27391 IX86_BUILTIN_POR256
,
27392 IX86_BUILTIN_PSADBW256
,
27393 IX86_BUILTIN_PSHUFB256
,
27394 IX86_BUILTIN_PSHUFD256
,
27395 IX86_BUILTIN_PSHUFHW256
,
27396 IX86_BUILTIN_PSHUFLW256
,
27397 IX86_BUILTIN_PSIGNB256
,
27398 IX86_BUILTIN_PSIGNW256
,
27399 IX86_BUILTIN_PSIGND256
,
27400 IX86_BUILTIN_PSLLDQI256
,
27401 IX86_BUILTIN_PSLLWI256
,
27402 IX86_BUILTIN_PSLLW256
,
27403 IX86_BUILTIN_PSLLDI256
,
27404 IX86_BUILTIN_PSLLD256
,
27405 IX86_BUILTIN_PSLLQI256
,
27406 IX86_BUILTIN_PSLLQ256
,
27407 IX86_BUILTIN_PSRAWI256
,
27408 IX86_BUILTIN_PSRAW256
,
27409 IX86_BUILTIN_PSRADI256
,
27410 IX86_BUILTIN_PSRAD256
,
27411 IX86_BUILTIN_PSRLDQI256
,
27412 IX86_BUILTIN_PSRLWI256
,
27413 IX86_BUILTIN_PSRLW256
,
27414 IX86_BUILTIN_PSRLDI256
,
27415 IX86_BUILTIN_PSRLD256
,
27416 IX86_BUILTIN_PSRLQI256
,
27417 IX86_BUILTIN_PSRLQ256
,
27418 IX86_BUILTIN_PSUBB256
,
27419 IX86_BUILTIN_PSUBW256
,
27420 IX86_BUILTIN_PSUBD256
,
27421 IX86_BUILTIN_PSUBQ256
,
27422 IX86_BUILTIN_PSUBSB256
,
27423 IX86_BUILTIN_PSUBSW256
,
27424 IX86_BUILTIN_PSUBUSB256
,
27425 IX86_BUILTIN_PSUBUSW256
,
27426 IX86_BUILTIN_PUNPCKHBW256
,
27427 IX86_BUILTIN_PUNPCKHWD256
,
27428 IX86_BUILTIN_PUNPCKHDQ256
,
27429 IX86_BUILTIN_PUNPCKHQDQ256
,
27430 IX86_BUILTIN_PUNPCKLBW256
,
27431 IX86_BUILTIN_PUNPCKLWD256
,
27432 IX86_BUILTIN_PUNPCKLDQ256
,
27433 IX86_BUILTIN_PUNPCKLQDQ256
,
27434 IX86_BUILTIN_PXOR256
,
27435 IX86_BUILTIN_MOVNTDQA256
,
27436 IX86_BUILTIN_VBROADCASTSS_PS
,
27437 IX86_BUILTIN_VBROADCASTSS_PS256
,
27438 IX86_BUILTIN_VBROADCASTSD_PD256
,
27439 IX86_BUILTIN_VBROADCASTSI256
,
27440 IX86_BUILTIN_PBLENDD256
,
27441 IX86_BUILTIN_PBLENDD128
,
27442 IX86_BUILTIN_PBROADCASTB256
,
27443 IX86_BUILTIN_PBROADCASTW256
,
27444 IX86_BUILTIN_PBROADCASTD256
,
27445 IX86_BUILTIN_PBROADCASTQ256
,
27446 IX86_BUILTIN_PBROADCASTB128
,
27447 IX86_BUILTIN_PBROADCASTW128
,
27448 IX86_BUILTIN_PBROADCASTD128
,
27449 IX86_BUILTIN_PBROADCASTQ128
,
27450 IX86_BUILTIN_VPERMVARSI256
,
27451 IX86_BUILTIN_VPERMDF256
,
27452 IX86_BUILTIN_VPERMVARSF256
,
27453 IX86_BUILTIN_VPERMDI256
,
27454 IX86_BUILTIN_VPERMTI256
,
27455 IX86_BUILTIN_VEXTRACT128I256
,
27456 IX86_BUILTIN_VINSERT128I256
,
27457 IX86_BUILTIN_MASKLOADD
,
27458 IX86_BUILTIN_MASKLOADQ
,
27459 IX86_BUILTIN_MASKLOADD256
,
27460 IX86_BUILTIN_MASKLOADQ256
,
27461 IX86_BUILTIN_MASKSTORED
,
27462 IX86_BUILTIN_MASKSTOREQ
,
27463 IX86_BUILTIN_MASKSTORED256
,
27464 IX86_BUILTIN_MASKSTOREQ256
,
27465 IX86_BUILTIN_PSLLVV4DI
,
27466 IX86_BUILTIN_PSLLVV2DI
,
27467 IX86_BUILTIN_PSLLVV8SI
,
27468 IX86_BUILTIN_PSLLVV4SI
,
27469 IX86_BUILTIN_PSRAVV8SI
,
27470 IX86_BUILTIN_PSRAVV4SI
,
27471 IX86_BUILTIN_PSRLVV4DI
,
27472 IX86_BUILTIN_PSRLVV2DI
,
27473 IX86_BUILTIN_PSRLVV8SI
,
27474 IX86_BUILTIN_PSRLVV4SI
,
27476 IX86_BUILTIN_GATHERSIV2DF
,
27477 IX86_BUILTIN_GATHERSIV4DF
,
27478 IX86_BUILTIN_GATHERDIV2DF
,
27479 IX86_BUILTIN_GATHERDIV4DF
,
27480 IX86_BUILTIN_GATHERSIV4SF
,
27481 IX86_BUILTIN_GATHERSIV8SF
,
27482 IX86_BUILTIN_GATHERDIV4SF
,
27483 IX86_BUILTIN_GATHERDIV8SF
,
27484 IX86_BUILTIN_GATHERSIV2DI
,
27485 IX86_BUILTIN_GATHERSIV4DI
,
27486 IX86_BUILTIN_GATHERDIV2DI
,
27487 IX86_BUILTIN_GATHERDIV4DI
,
27488 IX86_BUILTIN_GATHERSIV4SI
,
27489 IX86_BUILTIN_GATHERSIV8SI
,
27490 IX86_BUILTIN_GATHERDIV4SI
,
27491 IX86_BUILTIN_GATHERDIV8SI
,
27493 /* Alternate 4 element gather for the vectorizer where
27494 all operands are 32-byte wide. */
27495 IX86_BUILTIN_GATHERALTSIV4DF
,
27496 IX86_BUILTIN_GATHERALTDIV8SF
,
27497 IX86_BUILTIN_GATHERALTSIV4DI
,
27498 IX86_BUILTIN_GATHERALTDIV8SI
,
27500 /* TFmode support builtins. */
27502 IX86_BUILTIN_HUGE_VALQ
,
27503 IX86_BUILTIN_FABSQ
,
27504 IX86_BUILTIN_COPYSIGNQ
,
27506 /* Vectorizer support builtins. */
27507 IX86_BUILTIN_CPYSGNPS
,
27508 IX86_BUILTIN_CPYSGNPD
,
27509 IX86_BUILTIN_CPYSGNPS256
,
27510 IX86_BUILTIN_CPYSGNPD256
,
27512 /* FMA4 instructions. */
27513 IX86_BUILTIN_VFMADDSS
,
27514 IX86_BUILTIN_VFMADDSD
,
27515 IX86_BUILTIN_VFMADDPS
,
27516 IX86_BUILTIN_VFMADDPD
,
27517 IX86_BUILTIN_VFMADDPS256
,
27518 IX86_BUILTIN_VFMADDPD256
,
27519 IX86_BUILTIN_VFMADDSUBPS
,
27520 IX86_BUILTIN_VFMADDSUBPD
,
27521 IX86_BUILTIN_VFMADDSUBPS256
,
27522 IX86_BUILTIN_VFMADDSUBPD256
,
27524 /* FMA3 instructions. */
27525 IX86_BUILTIN_VFMADDSS3
,
27526 IX86_BUILTIN_VFMADDSD3
,
27528 /* XOP instructions. */
27529 IX86_BUILTIN_VPCMOV
,
27530 IX86_BUILTIN_VPCMOV_V2DI
,
27531 IX86_BUILTIN_VPCMOV_V4SI
,
27532 IX86_BUILTIN_VPCMOV_V8HI
,
27533 IX86_BUILTIN_VPCMOV_V16QI
,
27534 IX86_BUILTIN_VPCMOV_V4SF
,
27535 IX86_BUILTIN_VPCMOV_V2DF
,
27536 IX86_BUILTIN_VPCMOV256
,
27537 IX86_BUILTIN_VPCMOV_V4DI256
,
27538 IX86_BUILTIN_VPCMOV_V8SI256
,
27539 IX86_BUILTIN_VPCMOV_V16HI256
,
27540 IX86_BUILTIN_VPCMOV_V32QI256
,
27541 IX86_BUILTIN_VPCMOV_V8SF256
,
27542 IX86_BUILTIN_VPCMOV_V4DF256
,
27544 IX86_BUILTIN_VPPERM
,
27546 IX86_BUILTIN_VPMACSSWW
,
27547 IX86_BUILTIN_VPMACSWW
,
27548 IX86_BUILTIN_VPMACSSWD
,
27549 IX86_BUILTIN_VPMACSWD
,
27550 IX86_BUILTIN_VPMACSSDD
,
27551 IX86_BUILTIN_VPMACSDD
,
27552 IX86_BUILTIN_VPMACSSDQL
,
27553 IX86_BUILTIN_VPMACSSDQH
,
27554 IX86_BUILTIN_VPMACSDQL
,
27555 IX86_BUILTIN_VPMACSDQH
,
27556 IX86_BUILTIN_VPMADCSSWD
,
27557 IX86_BUILTIN_VPMADCSWD
,
27559 IX86_BUILTIN_VPHADDBW
,
27560 IX86_BUILTIN_VPHADDBD
,
27561 IX86_BUILTIN_VPHADDBQ
,
27562 IX86_BUILTIN_VPHADDWD
,
27563 IX86_BUILTIN_VPHADDWQ
,
27564 IX86_BUILTIN_VPHADDDQ
,
27565 IX86_BUILTIN_VPHADDUBW
,
27566 IX86_BUILTIN_VPHADDUBD
,
27567 IX86_BUILTIN_VPHADDUBQ
,
27568 IX86_BUILTIN_VPHADDUWD
,
27569 IX86_BUILTIN_VPHADDUWQ
,
27570 IX86_BUILTIN_VPHADDUDQ
,
27571 IX86_BUILTIN_VPHSUBBW
,
27572 IX86_BUILTIN_VPHSUBWD
,
27573 IX86_BUILTIN_VPHSUBDQ
,
27575 IX86_BUILTIN_VPROTB
,
27576 IX86_BUILTIN_VPROTW
,
27577 IX86_BUILTIN_VPROTD
,
27578 IX86_BUILTIN_VPROTQ
,
27579 IX86_BUILTIN_VPROTB_IMM
,
27580 IX86_BUILTIN_VPROTW_IMM
,
27581 IX86_BUILTIN_VPROTD_IMM
,
27582 IX86_BUILTIN_VPROTQ_IMM
,
27584 IX86_BUILTIN_VPSHLB
,
27585 IX86_BUILTIN_VPSHLW
,
27586 IX86_BUILTIN_VPSHLD
,
27587 IX86_BUILTIN_VPSHLQ
,
27588 IX86_BUILTIN_VPSHAB
,
27589 IX86_BUILTIN_VPSHAW
,
27590 IX86_BUILTIN_VPSHAD
,
27591 IX86_BUILTIN_VPSHAQ
,
27593 IX86_BUILTIN_VFRCZSS
,
27594 IX86_BUILTIN_VFRCZSD
,
27595 IX86_BUILTIN_VFRCZPS
,
27596 IX86_BUILTIN_VFRCZPD
,
27597 IX86_BUILTIN_VFRCZPS256
,
27598 IX86_BUILTIN_VFRCZPD256
,
27600 IX86_BUILTIN_VPCOMEQUB
,
27601 IX86_BUILTIN_VPCOMNEUB
,
27602 IX86_BUILTIN_VPCOMLTUB
,
27603 IX86_BUILTIN_VPCOMLEUB
,
27604 IX86_BUILTIN_VPCOMGTUB
,
27605 IX86_BUILTIN_VPCOMGEUB
,
27606 IX86_BUILTIN_VPCOMFALSEUB
,
27607 IX86_BUILTIN_VPCOMTRUEUB
,
27609 IX86_BUILTIN_VPCOMEQUW
,
27610 IX86_BUILTIN_VPCOMNEUW
,
27611 IX86_BUILTIN_VPCOMLTUW
,
27612 IX86_BUILTIN_VPCOMLEUW
,
27613 IX86_BUILTIN_VPCOMGTUW
,
27614 IX86_BUILTIN_VPCOMGEUW
,
27615 IX86_BUILTIN_VPCOMFALSEUW
,
27616 IX86_BUILTIN_VPCOMTRUEUW
,
27618 IX86_BUILTIN_VPCOMEQUD
,
27619 IX86_BUILTIN_VPCOMNEUD
,
27620 IX86_BUILTIN_VPCOMLTUD
,
27621 IX86_BUILTIN_VPCOMLEUD
,
27622 IX86_BUILTIN_VPCOMGTUD
,
27623 IX86_BUILTIN_VPCOMGEUD
,
27624 IX86_BUILTIN_VPCOMFALSEUD
,
27625 IX86_BUILTIN_VPCOMTRUEUD
,
27627 IX86_BUILTIN_VPCOMEQUQ
,
27628 IX86_BUILTIN_VPCOMNEUQ
,
27629 IX86_BUILTIN_VPCOMLTUQ
,
27630 IX86_BUILTIN_VPCOMLEUQ
,
27631 IX86_BUILTIN_VPCOMGTUQ
,
27632 IX86_BUILTIN_VPCOMGEUQ
,
27633 IX86_BUILTIN_VPCOMFALSEUQ
,
27634 IX86_BUILTIN_VPCOMTRUEUQ
,
27636 IX86_BUILTIN_VPCOMEQB
,
27637 IX86_BUILTIN_VPCOMNEB
,
27638 IX86_BUILTIN_VPCOMLTB
,
27639 IX86_BUILTIN_VPCOMLEB
,
27640 IX86_BUILTIN_VPCOMGTB
,
27641 IX86_BUILTIN_VPCOMGEB
,
27642 IX86_BUILTIN_VPCOMFALSEB
,
27643 IX86_BUILTIN_VPCOMTRUEB
,
27645 IX86_BUILTIN_VPCOMEQW
,
27646 IX86_BUILTIN_VPCOMNEW
,
27647 IX86_BUILTIN_VPCOMLTW
,
27648 IX86_BUILTIN_VPCOMLEW
,
27649 IX86_BUILTIN_VPCOMGTW
,
27650 IX86_BUILTIN_VPCOMGEW
,
27651 IX86_BUILTIN_VPCOMFALSEW
,
27652 IX86_BUILTIN_VPCOMTRUEW
,
27654 IX86_BUILTIN_VPCOMEQD
,
27655 IX86_BUILTIN_VPCOMNED
,
27656 IX86_BUILTIN_VPCOMLTD
,
27657 IX86_BUILTIN_VPCOMLED
,
27658 IX86_BUILTIN_VPCOMGTD
,
27659 IX86_BUILTIN_VPCOMGED
,
27660 IX86_BUILTIN_VPCOMFALSED
,
27661 IX86_BUILTIN_VPCOMTRUED
,
27663 IX86_BUILTIN_VPCOMEQQ
,
27664 IX86_BUILTIN_VPCOMNEQ
,
27665 IX86_BUILTIN_VPCOMLTQ
,
27666 IX86_BUILTIN_VPCOMLEQ
,
27667 IX86_BUILTIN_VPCOMGTQ
,
27668 IX86_BUILTIN_VPCOMGEQ
,
27669 IX86_BUILTIN_VPCOMFALSEQ
,
27670 IX86_BUILTIN_VPCOMTRUEQ
,
27672 /* LWP instructions. */
27673 IX86_BUILTIN_LLWPCB
,
27674 IX86_BUILTIN_SLWPCB
,
27675 IX86_BUILTIN_LWPVAL32
,
27676 IX86_BUILTIN_LWPVAL64
,
27677 IX86_BUILTIN_LWPINS32
,
27678 IX86_BUILTIN_LWPINS64
,
27683 IX86_BUILTIN_XBEGIN
,
27685 IX86_BUILTIN_XABORT
,
27686 IX86_BUILTIN_XTEST
,
27688 /* BMI instructions. */
27689 IX86_BUILTIN_BEXTR32
,
27690 IX86_BUILTIN_BEXTR64
,
27693 /* TBM instructions. */
27694 IX86_BUILTIN_BEXTRI32
,
27695 IX86_BUILTIN_BEXTRI64
,
27697 /* BMI2 instructions. */
27698 IX86_BUILTIN_BZHI32
,
27699 IX86_BUILTIN_BZHI64
,
27700 IX86_BUILTIN_PDEP32
,
27701 IX86_BUILTIN_PDEP64
,
27702 IX86_BUILTIN_PEXT32
,
27703 IX86_BUILTIN_PEXT64
,
27705 /* ADX instructions. */
27706 IX86_BUILTIN_ADDCARRYX32
,
27707 IX86_BUILTIN_ADDCARRYX64
,
27709 /* FSGSBASE instructions. */
27710 IX86_BUILTIN_RDFSBASE32
,
27711 IX86_BUILTIN_RDFSBASE64
,
27712 IX86_BUILTIN_RDGSBASE32
,
27713 IX86_BUILTIN_RDGSBASE64
,
27714 IX86_BUILTIN_WRFSBASE32
,
27715 IX86_BUILTIN_WRFSBASE64
,
27716 IX86_BUILTIN_WRGSBASE32
,
27717 IX86_BUILTIN_WRGSBASE64
,
27719 /* RDRND instructions. */
27720 IX86_BUILTIN_RDRAND16_STEP
,
27721 IX86_BUILTIN_RDRAND32_STEP
,
27722 IX86_BUILTIN_RDRAND64_STEP
,
27724 /* RDSEED instructions. */
27725 IX86_BUILTIN_RDSEED16_STEP
,
27726 IX86_BUILTIN_RDSEED32_STEP
,
27727 IX86_BUILTIN_RDSEED64_STEP
,
27729 /* F16C instructions. */
27730 IX86_BUILTIN_CVTPH2PS
,
27731 IX86_BUILTIN_CVTPH2PS256
,
27732 IX86_BUILTIN_CVTPS2PH
,
27733 IX86_BUILTIN_CVTPS2PH256
,
27735 /* CFString built-in for darwin */
27736 IX86_BUILTIN_CFSTRING
,
27738 /* Builtins to get CPU type and supported features. */
27739 IX86_BUILTIN_CPU_INIT
,
27740 IX86_BUILTIN_CPU_IS
,
27741 IX86_BUILTIN_CPU_SUPPORTS
,
27746 /* Table for the ix86 builtin decls. */
27747 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27749 /* Table of all of the builtin functions that are possible with different ISA's
27750 but are waiting to be built until a function is declared to use that
27752 struct builtin_isa
{
27753 const char *name
; /* function name */
27754 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27755 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27756 bool const_p
; /* true if the declaration is constant */
27757 bool set_and_not_built_p
;
27760 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27763 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27764 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27765 function decl in the ix86_builtins array. Returns the function decl or
27766 NULL_TREE, if the builtin was not added.
27768 If the front end has a special hook for builtin functions, delay adding
27769 builtin functions that aren't in the current ISA until the ISA is changed
27770 with function specific optimization. Doing so, can save about 300K for the
27771 default compiler. When the builtin is expanded, check at that time whether
27774 If the front end doesn't have a special hook, record all builtins, even if
27775 it isn't an instruction set in the current ISA in case the user uses
27776 function specific options for a different ISA, so that we don't get scope
27777 errors if a builtin is added in the middle of a function scope. */
27780 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27781 enum ix86_builtin_func_type tcode
,
27782 enum ix86_builtins code
)
27784 tree decl
= NULL_TREE
;
27786 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27788 ix86_builtins_isa
[(int) code
].isa
= mask
;
27790 mask
&= ~OPTION_MASK_ISA_64BIT
;
27792 || (mask
& ix86_isa_flags
) != 0
27793 || (lang_hooks
.builtin_function
27794 == lang_hooks
.builtin_function_ext_scope
))
27797 tree type
= ix86_get_builtin_func_type (tcode
);
27798 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27800 ix86_builtins
[(int) code
] = decl
;
27801 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27805 ix86_builtins
[(int) code
] = NULL_TREE
;
27806 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27807 ix86_builtins_isa
[(int) code
].name
= name
;
27808 ix86_builtins_isa
[(int) code
].const_p
= false;
27809 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27816 /* Like def_builtin, but also marks the function decl "const". */
27819 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27820 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27822 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27824 TREE_READONLY (decl
) = 1;
27826 ix86_builtins_isa
[(int) code
].const_p
= true;
27831 /* Add any new builtin functions for a given ISA that may not have been
27832 declared. This saves a bit of space compared to adding all of the
27833 declarations to the tree, even if we didn't use them. */
27836 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27840 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27842 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27843 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27847 /* Don't define the builtin again. */
27848 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27850 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27851 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27852 type
, i
, BUILT_IN_MD
, NULL
,
27855 ix86_builtins
[i
] = decl
;
27856 if (ix86_builtins_isa
[i
].const_p
)
27857 TREE_READONLY (decl
) = 1;
27862 /* Bits for builtin_description.flag. */
27864 /* Set when we don't support the comparison natively, and should
27865 swap_comparison in order to support it. */
27866 #define BUILTIN_DESC_SWAP_OPERANDS 1
27868 struct builtin_description
27870 const HOST_WIDE_INT mask
;
27871 const enum insn_code icode
;
27872 const char *const name
;
27873 const enum ix86_builtins code
;
27874 const enum rtx_code comparison
;
27878 static const struct builtin_description bdesc_comi
[] =
27880 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
27881 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
27882 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
27883 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
27884 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
27885 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
27886 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
27887 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
27888 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
27889 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
27890 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
27891 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
27892 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
27893 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
27894 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
27895 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
27896 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
27897 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
27898 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
27899 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
27900 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
27901 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
27902 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
27903 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
27906 static const struct builtin_description bdesc_pcmpestr
[] =
27909 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
27910 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
27911 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
27912 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
27913 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
27914 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
27915 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
27918 static const struct builtin_description bdesc_pcmpistr
[] =
27921 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
27922 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
27923 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
27924 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
27925 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
27926 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
27927 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
27930 /* Special builtins with variable number of arguments. */
27931 static const struct builtin_description bdesc_special_args
[] =
27933 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27934 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
27935 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27938 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27941 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27943 /* FXSR, XSAVE and XSAVEOPT */
27944 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27945 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27946 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27947 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27948 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27950 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27951 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27952 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27953 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27954 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27957 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27958 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27959 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27961 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27962 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27963 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27964 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27966 /* SSE or 3DNow!A */
27967 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27968 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
27971 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27972 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27973 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27974 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedquv16qi
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
27975 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27976 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
27977 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
27978 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
27979 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
27980 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddquv16qi
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27982 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27983 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27986 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27989 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
27992 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27993 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27996 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27997 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27999 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
28000 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28001 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
28003 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
28005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28007 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddquv32qi
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedquv32qi
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
28011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28013 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
28014 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28017 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
28018 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
28019 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
28020 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
28021 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
28022 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
28023 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
28024 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
28027 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
28028 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
28029 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
28030 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
28031 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
28032 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
28033 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
28034 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
28035 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
28037 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28038 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
28039 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
28040 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
28041 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
28042 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
28045 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28046 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28047 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28048 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28049 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28050 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28051 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28052 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28055 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28056 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28057 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
28060 /* Builtins with variable number of arguments. */
28061 static const struct builtin_description bdesc_args
[] =
28063 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
28064 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
28065 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
28066 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28067 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28068 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28069 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28072 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28073 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28074 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28075 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28076 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28077 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28079 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28080 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28081 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28082 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28083 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28084 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28085 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28086 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28088 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28089 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28091 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28092 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28093 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28094 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28096 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28097 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28098 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28099 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28100 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28101 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28103 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28104 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28105 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28106 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28107 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28108 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28110 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28111 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
28112 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28114 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
28116 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28117 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28118 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28119 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28120 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28121 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28123 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28124 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28125 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28126 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28127 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28128 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28130 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28131 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28132 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28133 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28136 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28137 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28138 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28139 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28141 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28142 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28143 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28144 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28145 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28146 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28147 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28148 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28149 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28150 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28151 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28152 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28153 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28154 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28155 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28158 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28159 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28160 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28161 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28162 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28163 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28166 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28167 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28168 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28169 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28170 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28171 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28172 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28173 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28174 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28175 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28176 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28177 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28179 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28181 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28182 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28183 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28184 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28185 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28186 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28187 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28188 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28190 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28191 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28192 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28193 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28194 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28195 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28196 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28197 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28198 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28199 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28200 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28201 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28202 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28203 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28204 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28205 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28206 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28207 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28208 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28209 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28211 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28212 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28213 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28214 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28216 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28217 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28218 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28219 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28221 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28223 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28224 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28225 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28226 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28227 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28229 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
28230 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
28231 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
28233 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
28235 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28236 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28237 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28239 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
28240 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
28242 /* SSE MMX or 3Dnow!A */
28243 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28244 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28245 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28247 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28248 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28249 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28250 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28252 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
28253 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
28255 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
28258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
28262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
28264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
28266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
28269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
28274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28276 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28277 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_fix_notruncv4sfv4si
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
28281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28310 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28335 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28344 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28346 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28347 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
28352 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28354 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28355 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28357 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28360 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28361 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28362 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28363 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28364 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28365 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28367 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28368 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28369 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28370 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28372 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28373 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28374 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28375 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28376 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28377 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28378 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28379 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28381 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28382 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28383 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28385 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28386 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
28388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
28389 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28391 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
28393 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
28394 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
28395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
28396 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
28398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28401 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28408 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28411 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
28420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
28425 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28432 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28433 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28435 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28436 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28437 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28438 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28439 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28440 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28443 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28444 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
28445 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28446 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
28447 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28448 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28450 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28451 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28452 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28453 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28454 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28455 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28456 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28457 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28458 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28459 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28460 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28461 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28462 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
28463 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
28464 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28465 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28466 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28467 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28468 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28469 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28470 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28471 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28472 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28473 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28476 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
28477 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
28480 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28481 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28482 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
28483 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
28484 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28485 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28486 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28487 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
28488 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
28489 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
28491 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28492 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28493 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28494 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28495 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28496 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28497 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28498 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28499 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28500 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28501 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28502 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28503 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28505 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28506 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28507 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28508 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28509 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28510 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28511 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28512 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28513 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28514 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28515 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28516 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28519 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28520 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28521 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28522 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28524 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28525 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
28526 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
28527 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28529 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28530 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28532 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28533 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28535 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28536 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
28537 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
28538 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28540 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
28541 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
28543 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28544 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28546 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28547 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28548 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28551 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28552 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
28553 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
28554 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28555 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28558 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
28559 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
28560 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
28561 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28564 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
28565 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28567 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28568 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28569 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28570 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28573 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
28576 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28577 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28579 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28580 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28581 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28582 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28583 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28584 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28587 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28588 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28589 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28590 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28591 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28592 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28593 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28594 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28595 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28596 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28597 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28598 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28599 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28600 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28601 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28603 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28604 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28605 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28606 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28608 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28609 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28610 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28611 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28612 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28613 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28614 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28615 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28616 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28617 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28618 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28619 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28620 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28621 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28622 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28623 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28624 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28625 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28626 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28627 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_fix_notruncv8sfv8si
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28628 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28629 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28630 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28631 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28632 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28633 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28634 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28635 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28636 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28637 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28638 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28639 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28640 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28641 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28643 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28644 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28645 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28647 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28648 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28649 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28650 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28651 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28653 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28655 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28656 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28658 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28659 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28660 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28661 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28663 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28664 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28666 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28667 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28669 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28670 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28671 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28672 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28674 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28675 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28677 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28678 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28680 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28681 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28682 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28683 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28685 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28686 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28687 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28688 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28689 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28690 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28692 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28693 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28694 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28695 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28696 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28697 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28698 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28699 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28700 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28701 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28702 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28703 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28704 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28705 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28706 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28708 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28709 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28711 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28712 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28714 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28762 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28763 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28764 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28779 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28780 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28781 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28782 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28783 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28784 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28785 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28786 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28787 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28788 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28789 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28790 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28791 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28792 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28793 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28794 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28795 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28796 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28797 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28798 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28799 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28800 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28801 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28802 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28803 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28804 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28805 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28806 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28807 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28808 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28809 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28810 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28811 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28812 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28813 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28814 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28815 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28816 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28817 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28818 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28819 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28820 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28821 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28822 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28823 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28824 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28825 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28826 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28827 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28828 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28829 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28830 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28831 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28832 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28833 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28834 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28835 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28836 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28837 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28838 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28839 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28840 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28841 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28842 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28843 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28844 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28845 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28846 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28847 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28848 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28849 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28850 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28851 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
28852 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
28853 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28854 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28855 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28856 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28857 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28858 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28859 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28860 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28861 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28862 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28864 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28867 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28868 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28869 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28872 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28873 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28876 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
28877 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
28878 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
28879 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
28882 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28883 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28884 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28885 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28886 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28887 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28890 /* FMA4 and XOP. */
28891 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28892 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28893 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28894 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28895 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28896 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28897 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28898 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28899 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28900 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28901 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28902 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28903 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28904 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28905 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28906 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28907 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28908 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28909 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28910 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28911 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28912 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28913 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28914 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28915 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28916 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28917 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28918 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28919 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28920 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28921 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28922 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28923 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28924 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28925 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28926 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28927 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28928 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28929 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28930 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28931 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28932 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28933 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28934 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28935 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28936 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28937 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28938 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28939 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28940 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28941 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28942 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28944 static const struct builtin_description bdesc_multi_arg
[] =
28946 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
28947 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
28948 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28949 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
28950 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
28951 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28953 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
28954 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
28955 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28956 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
28957 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
28958 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28960 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
28961 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
28962 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28963 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
28964 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
28965 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28966 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
28967 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
28968 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28969 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
28970 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
28971 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28973 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
28974 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
28975 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28976 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
28977 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
28978 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28979 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
28980 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
28981 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28982 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
28983 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
28984 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28988 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
28991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
28992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
28994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28996 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
28997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
28998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
28999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29002 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
29004 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
29022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
29023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
29024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
29025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29028 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29030 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29032 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29034 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
29035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
29036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
29037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
29038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
29039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
29041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29042 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29045 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29046 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29049 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29050 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29051 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29054 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29059 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
29061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
29062 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
29063 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
29065 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29066 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29067 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29068 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
29069 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
29070 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
29071 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
29073 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29074 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29075 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29076 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
29077 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
29078 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
29079 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
29081 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29082 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29083 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29084 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
29085 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
29086 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
29087 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
29089 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29090 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29091 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29092 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
29093 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
29094 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
29095 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
29097 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29098 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29099 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29100 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
29101 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
29102 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
29103 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
29105 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29106 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29107 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29108 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
29109 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
29110 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
29111 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
29113 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29114 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29115 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29116 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
29117 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
29118 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
29119 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
29121 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29122 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29123 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29124 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29125 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29126 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29127 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29128 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29130 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29131 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29132 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29133 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29134 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29135 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29136 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29137 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29139 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
29140 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
29141 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
29142 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
29146 /* TM vector builtins. */
29148 /* Reuse the existing x86-specific `struct builtin_description' cause
29149 we're lazy. Add casts to make them fit. */
29150 static const struct builtin_description bdesc_tm
[] =
29152 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29153 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29154 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29155 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29156 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29157 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29158 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29160 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29161 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29162 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29163 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29164 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29165 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29166 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29168 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29169 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29170 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29171 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29172 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29173 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29174 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29176 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29177 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29178 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29181 /* TM callbacks. */
29183 /* Return the builtin decl needed to load a vector of TYPE. */
29186 ix86_builtin_tm_load (tree type
)
29188 if (TREE_CODE (type
) == VECTOR_TYPE
)
29190 switch (tree_low_cst (TYPE_SIZE (type
), 1))
29193 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
29195 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
29197 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
29203 /* Return the builtin decl needed to store a vector of TYPE. */
29206 ix86_builtin_tm_store (tree type
)
29208 if (TREE_CODE (type
) == VECTOR_TYPE
)
29210 switch (tree_low_cst (TYPE_SIZE (type
), 1))
29213 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
29215 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
29217 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
29223 /* Initialize the transactional memory vector load/store builtins. */
29226 ix86_init_tm_builtins (void)
29228 enum ix86_builtin_func_type ftype
;
29229 const struct builtin_description
*d
;
29232 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
29233 tree attrs_log
, attrs_type_log
;
29238 /* If there are no builtins defined, we must be compiling in a
29239 language without trans-mem support. */
29240 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
29243 /* Use whatever attributes a normal TM load has. */
29244 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
29245 attrs_load
= DECL_ATTRIBUTES (decl
);
29246 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29247 /* Use whatever attributes a normal TM store has. */
29248 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
29249 attrs_store
= DECL_ATTRIBUTES (decl
);
29250 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29251 /* Use whatever attributes a normal TM log has. */
29252 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
29253 attrs_log
= DECL_ATTRIBUTES (decl
);
29254 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29256 for (i
= 0, d
= bdesc_tm
;
29257 i
< ARRAY_SIZE (bdesc_tm
);
29260 if ((d
->mask
& ix86_isa_flags
) != 0
29261 || (lang_hooks
.builtin_function
29262 == lang_hooks
.builtin_function_ext_scope
))
29264 tree type
, attrs
, attrs_type
;
29265 enum built_in_function code
= (enum built_in_function
) d
->code
;
29267 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29268 type
= ix86_get_builtin_func_type (ftype
);
29270 if (BUILTIN_TM_LOAD_P (code
))
29272 attrs
= attrs_load
;
29273 attrs_type
= attrs_type_load
;
29275 else if (BUILTIN_TM_STORE_P (code
))
29277 attrs
= attrs_store
;
29278 attrs_type
= attrs_type_store
;
29283 attrs_type
= attrs_type_log
;
29285 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
29286 /* The builtin without the prefix for
29287 calling it directly. */
29288 d
->name
+ strlen ("__builtin_"),
29290 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
29291 set the TYPE_ATTRIBUTES. */
29292 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
29294 set_builtin_decl (code
, decl
, false);
29299 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
29300 in the current target ISA to allow the user to compile particular modules
29301 with different target specific options that differ from the command line
29304 ix86_init_mmx_sse_builtins (void)
29306 const struct builtin_description
* d
;
29307 enum ix86_builtin_func_type ftype
;
29310 /* Add all special builtins with variable number of operands. */
29311 for (i
= 0, d
= bdesc_special_args
;
29312 i
< ARRAY_SIZE (bdesc_special_args
);
29318 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29319 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
29322 /* Add all builtins with variable number of operands. */
29323 for (i
= 0, d
= bdesc_args
;
29324 i
< ARRAY_SIZE (bdesc_args
);
29330 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29331 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29334 /* pcmpestr[im] insns. */
29335 for (i
= 0, d
= bdesc_pcmpestr
;
29336 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29339 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29340 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
29342 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
29343 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29346 /* pcmpistr[im] insns. */
29347 for (i
= 0, d
= bdesc_pcmpistr
;
29348 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29351 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29352 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
29354 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
29355 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29358 /* comi/ucomi insns. */
29359 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29361 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
29362 ftype
= INT_FTYPE_V2DF_V2DF
;
29364 ftype
= INT_FTYPE_V4SF_V4SF
;
29365 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29369 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
29370 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
29371 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
29372 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
29374 /* SSE or 3DNow!A */
29375 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29376 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
29377 IX86_BUILTIN_MASKMOVQ
);
29380 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
29381 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
29383 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
29384 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
29385 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
29386 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
29389 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
29390 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
29391 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
29392 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
29395 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
29396 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
29397 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
29398 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
29399 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
29400 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
29401 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
29402 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
29403 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
29404 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
29405 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
29406 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
29409 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
29410 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
29413 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
29414 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
29415 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
29416 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
29417 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
29418 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
29419 IX86_BUILTIN_RDRAND64_STEP
);
29422 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
29423 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
29424 IX86_BUILTIN_GATHERSIV2DF
);
29426 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
29427 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
29428 IX86_BUILTIN_GATHERSIV4DF
);
29430 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
29431 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
29432 IX86_BUILTIN_GATHERDIV2DF
);
29434 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
29435 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
29436 IX86_BUILTIN_GATHERDIV4DF
);
29438 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
29439 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
29440 IX86_BUILTIN_GATHERSIV4SF
);
29442 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
29443 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
29444 IX86_BUILTIN_GATHERSIV8SF
);
29446 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
29447 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
29448 IX86_BUILTIN_GATHERDIV4SF
);
29450 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
29451 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
29452 IX86_BUILTIN_GATHERDIV8SF
);
29454 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
29455 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
29456 IX86_BUILTIN_GATHERSIV2DI
);
29458 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
29459 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
29460 IX86_BUILTIN_GATHERSIV4DI
);
29462 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
29463 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
29464 IX86_BUILTIN_GATHERDIV2DI
);
29466 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
29467 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
29468 IX86_BUILTIN_GATHERDIV4DI
);
29470 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
29471 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
29472 IX86_BUILTIN_GATHERSIV4SI
);
29474 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
29475 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
29476 IX86_BUILTIN_GATHERSIV8SI
);
29478 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
29479 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
29480 IX86_BUILTIN_GATHERDIV4SI
);
29482 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
29483 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
29484 IX86_BUILTIN_GATHERDIV8SI
);
29486 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
29487 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
29488 IX86_BUILTIN_GATHERALTSIV4DF
);
29490 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
29491 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
29492 IX86_BUILTIN_GATHERALTDIV8SF
);
29494 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
29495 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
29496 IX86_BUILTIN_GATHERALTSIV4DI
);
29498 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
29499 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
29500 IX86_BUILTIN_GATHERALTDIV8SI
);
29503 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
29504 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
29506 /* MMX access to the vec_init patterns. */
29507 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
29508 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
29510 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
29511 V4HI_FTYPE_HI_HI_HI_HI
,
29512 IX86_BUILTIN_VEC_INIT_V4HI
);
29514 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
29515 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
29516 IX86_BUILTIN_VEC_INIT_V8QI
);
29518 /* Access to the vec_extract patterns. */
29519 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
29520 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
29521 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
29522 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
29523 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
29524 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
29525 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
29526 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
29527 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
29528 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
29530 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29531 "__builtin_ia32_vec_ext_v4hi",
29532 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
29534 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
29535 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
29537 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
29538 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
29540 /* Access to the vec_set patterns. */
29541 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
29542 "__builtin_ia32_vec_set_v2di",
29543 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
29545 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
29546 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
29548 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
29549 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
29551 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
29552 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
29554 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29555 "__builtin_ia32_vec_set_v4hi",
29556 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
29558 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
29559 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
29562 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
29563 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
29564 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
29565 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
29566 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
29567 "__builtin_ia32_rdseed_di_step",
29568 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
29571 def_builtin (0, "__builtin_ia32_addcarryx_u32",
29572 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
29573 def_builtin (OPTION_MASK_ISA_64BIT
,
29574 "__builtin_ia32_addcarryx_u64",
29575 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
29576 IX86_BUILTIN_ADDCARRYX64
);
29578 /* Add FMA4 multi-arg argument instructions */
29579 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29584 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29585 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29589 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29590 to return a pointer to VERSION_DECL if the outcome of the expression
29591 formed by PREDICATE_CHAIN is true. This function will be called during
29592 version dispatch to decide which function version to execute. It returns
29593 the basic block at the end, to which more conditions can be added. */
29596 add_condition_to_bb (tree function_decl
, tree version_decl
,
29597 tree predicate_chain
, basic_block new_bb
)
29599 gimple return_stmt
;
29600 tree convert_expr
, result_var
;
29601 gimple convert_stmt
;
29602 gimple call_cond_stmt
;
29603 gimple if_else_stmt
;
29605 basic_block bb1
, bb2
, bb3
;
29608 tree cond_var
, and_expr_var
= NULL_TREE
;
29611 tree predicate_decl
, predicate_arg
;
29613 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29615 gcc_assert (new_bb
!= NULL
);
29616 gseq
= bb_seq (new_bb
);
29619 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29620 build_fold_addr_expr (version_decl
));
29621 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29622 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29623 return_stmt
= gimple_build_return (result_var
);
29625 if (predicate_chain
== NULL_TREE
)
29627 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29628 gimple_seq_add_stmt (&gseq
, return_stmt
);
29629 set_bb_seq (new_bb
, gseq
);
29630 gimple_set_bb (convert_stmt
, new_bb
);
29631 gimple_set_bb (return_stmt
, new_bb
);
29636 while (predicate_chain
!= NULL
)
29638 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29639 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29640 predicate_arg
= TREE_VALUE (predicate_chain
);
29641 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29642 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29644 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29645 gimple_set_bb (call_cond_stmt
, new_bb
);
29646 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29648 predicate_chain
= TREE_CHAIN (predicate_chain
);
29650 if (and_expr_var
== NULL
)
29651 and_expr_var
= cond_var
;
29654 gimple assign_stmt
;
29655 /* Use MIN_EXPR to check if any integer is zero?.
29656 and_expr_var = min_expr <cond_var, and_expr_var> */
29657 assign_stmt
= gimple_build_assign (and_expr_var
,
29658 build2 (MIN_EXPR
, integer_type_node
,
29659 cond_var
, and_expr_var
));
29661 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29662 gimple_set_bb (assign_stmt
, new_bb
);
29663 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29667 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29669 NULL_TREE
, NULL_TREE
);
29670 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29671 gimple_set_bb (if_else_stmt
, new_bb
);
29672 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29674 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29675 gimple_seq_add_stmt (&gseq
, return_stmt
);
29676 set_bb_seq (new_bb
, gseq
);
29679 e12
= split_block (bb1
, if_else_stmt
);
29681 e12
->flags
&= ~EDGE_FALLTHRU
;
29682 e12
->flags
|= EDGE_TRUE_VALUE
;
29684 e23
= split_block (bb2
, return_stmt
);
29686 gimple_set_bb (convert_stmt
, bb2
);
29687 gimple_set_bb (return_stmt
, bb2
);
29690 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29693 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
29700 /* This parses the attribute arguments to target in DECL and determines
29701 the right builtin to use to match the platform specification.
29702 It returns the priority value for this version decl. If PREDICATE_LIST
29703 is not NULL, it stores the list of cpu features that need to be checked
29704 before dispatching this function. */
29706 static unsigned int
29707 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29710 struct cl_target_option cur_target
;
29712 struct cl_target_option
*new_target
;
29713 const char *arg_str
= NULL
;
29714 const char *attrs_str
= NULL
;
29715 char *tok_str
= NULL
;
29718 /* Priority of i386 features, greater value is higher priority. This is
29719 used to decide the order in which function dispatch must happen. For
29720 instance, a version specialized for SSE4.2 should be checked for dispatch
29721 before a version for SSE3, as SSE4.2 implies SSE3. */
29722 enum feature_priority
29743 enum feature_priority priority
= P_ZERO
;
29745 /* These are the target attribute strings for which a dispatcher is
29746 available, from fold_builtin_cpu. */
29748 static struct _feature_list
29750 const char *const name
;
29751 const enum feature_priority priority
;
29753 const feature_list
[] =
29759 {"ssse3", P_SSSE3
},
29760 {"sse4.1", P_SSE4_1
},
29761 {"sse4.2", P_SSE4_2
},
29762 {"popcnt", P_POPCNT
},
29768 static unsigned int NUM_FEATURES
29769 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29773 tree predicate_chain
= NULL_TREE
;
29774 tree predicate_decl
, predicate_arg
;
29776 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29777 gcc_assert (attrs
!= NULL
);
29779 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29781 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29782 attrs_str
= TREE_STRING_POINTER (attrs
);
29784 /* Return priority zero for default function. */
29785 if (strcmp (attrs_str
, "default") == 0)
29788 /* Handle arch= if specified. For priority, set it to be 1 more than
29789 the best instruction set the processor can handle. For instance, if
29790 there is a version for atom and a version for ssse3 (the highest ISA
29791 priority for atom), the atom version must be checked for dispatch
29792 before the ssse3 version. */
29793 if (strstr (attrs_str
, "arch=") != NULL
)
29795 cl_target_option_save (&cur_target
, &global_options
);
29796 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
29797 &global_options_set
);
29799 gcc_assert (target_node
);
29800 new_target
= TREE_TARGET_OPTION (target_node
);
29801 gcc_assert (new_target
);
29803 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29805 switch (new_target
->arch
)
29807 case PROCESSOR_CORE2
:
29809 priority
= P_PROC_SSSE3
;
29811 case PROCESSOR_COREI7
:
29812 arg_str
= "corei7";
29813 priority
= P_PROC_SSE4_2
;
29815 case PROCESSOR_COREI7_AVX
:
29816 arg_str
= "corei7-avx";
29817 priority
= P_PROC_SSE4_2
;
29819 case PROCESSOR_ATOM
:
29821 priority
= P_PROC_SSSE3
;
29823 case PROCESSOR_AMDFAM10
:
29824 arg_str
= "amdfam10h";
29825 priority
= P_PROC_SSE4_a
;
29827 case PROCESSOR_BDVER1
:
29828 arg_str
= "bdver1";
29829 priority
= P_PROC_FMA
;
29831 case PROCESSOR_BDVER2
:
29832 arg_str
= "bdver2";
29833 priority
= P_PROC_FMA
;
29838 cl_target_option_restore (&global_options
, &cur_target
);
29840 if (predicate_list
&& arg_str
== NULL
)
29842 error_at (DECL_SOURCE_LOCATION (decl
),
29843 "No dispatcher found for the versioning attributes");
29847 if (predicate_list
)
29849 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29850 /* For a C string literal the length includes the trailing NULL. */
29851 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
29852 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29857 /* Process feature name. */
29858 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
29859 strcpy (tok_str
, attrs_str
);
29860 token
= strtok (tok_str
, ",");
29861 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
29863 while (token
!= NULL
)
29865 /* Do not process "arch=" */
29866 if (strncmp (token
, "arch=", 5) == 0)
29868 token
= strtok (NULL
, ",");
29871 for (i
= 0; i
< NUM_FEATURES
; ++i
)
29873 if (strcmp (token
, feature_list
[i
].name
) == 0)
29875 if (predicate_list
)
29877 predicate_arg
= build_string_literal (
29878 strlen (feature_list
[i
].name
) + 1,
29879 feature_list
[i
].name
);
29880 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29883 /* Find the maximum priority feature. */
29884 if (feature_list
[i
].priority
> priority
)
29885 priority
= feature_list
[i
].priority
;
29890 if (predicate_list
&& i
== NUM_FEATURES
)
29892 error_at (DECL_SOURCE_LOCATION (decl
),
29893 "No dispatcher found for %s", token
);
29896 token
= strtok (NULL
, ",");
29900 if (predicate_list
&& predicate_chain
== NULL_TREE
)
29902 error_at (DECL_SOURCE_LOCATION (decl
),
29903 "No dispatcher found for the versioning attributes : %s",
29907 else if (predicate_list
)
29909 predicate_chain
= nreverse (predicate_chain
);
29910 *predicate_list
= predicate_chain
;
29916 /* This compares the priority of target features in function DECL1
29917 and DECL2. It returns positive value if DECL1 is higher priority,
29918 negative value if DECL2 is higher priority and 0 if they are the
29922 ix86_compare_version_priority (tree decl1
, tree decl2
)
29924 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
29925 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
29927 return (int)priority1
- (int)priority2
;
29930 /* V1 and V2 point to function versions with different priorities
29931 based on the target ISA. This function compares their priorities. */
29934 feature_compare (const void *v1
, const void *v2
)
29936 typedef struct _function_version_info
29939 tree predicate_chain
;
29940 unsigned int dispatch_priority
;
29941 } function_version_info
;
29943 const function_version_info c1
= *(const function_version_info
*)v1
;
29944 const function_version_info c2
= *(const function_version_info
*)v2
;
29945 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
29948 /* This function generates the dispatch function for
29949 multi-versioned functions. DISPATCH_DECL is the function which will
29950 contain the dispatch logic. FNDECLS are the function choices for
29951 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29952 in DISPATCH_DECL in which the dispatch code is generated. */
29955 dispatch_function_versions (tree dispatch_decl
,
29957 basic_block
*empty_bb
)
29960 gimple ifunc_cpu_init_stmt
;
29964 vec
<tree
> *fndecls
;
29965 unsigned int num_versions
= 0;
29966 unsigned int actual_versions
= 0;
29969 struct _function_version_info
29972 tree predicate_chain
;
29973 unsigned int dispatch_priority
;
29974 }*function_version_info
;
29976 gcc_assert (dispatch_decl
!= NULL
29977 && fndecls_p
!= NULL
29978 && empty_bb
!= NULL
);
29980 /*fndecls_p is actually a vector. */
29981 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
29983 /* At least one more version other than the default. */
29984 num_versions
= fndecls
->length ();
29985 gcc_assert (num_versions
>= 2);
29987 function_version_info
= (struct _function_version_info
*)
29988 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
29990 /* The first version in the vector is the default decl. */
29991 default_decl
= (*fndecls
)[0];
29993 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
29995 gseq
= bb_seq (*empty_bb
);
29996 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29997 constructors, so explicity call __builtin_cpu_init here. */
29998 ifunc_cpu_init_stmt
= gimple_build_call_vec (
29999 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
30000 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
30001 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
30002 set_bb_seq (*empty_bb
, gseq
);
30007 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
30009 tree version_decl
= ele
;
30010 tree predicate_chain
= NULL_TREE
;
30011 unsigned int priority
;
30012 /* Get attribute string, parse it and find the right predicate decl.
30013 The predicate function could be a lengthy combination of many
30014 features, like arch-type and various isa-variants. */
30015 priority
= get_builtin_code_for_version (version_decl
,
30018 if (predicate_chain
== NULL_TREE
)
30021 function_version_info
[actual_versions
].version_decl
= version_decl
;
30022 function_version_info
[actual_versions
].predicate_chain
30024 function_version_info
[actual_versions
].dispatch_priority
= priority
;
30028 /* Sort the versions according to descending order of dispatch priority. The
30029 priority is based on the ISA. This is not a perfect solution. There
30030 could still be ambiguity. If more than one function version is suitable
30031 to execute, which one should be dispatched? In future, allow the user
30032 to specify a dispatch priority next to the version. */
30033 qsort (function_version_info
, actual_versions
,
30034 sizeof (struct _function_version_info
), feature_compare
);
30036 for (i
= 0; i
< actual_versions
; ++i
)
30037 *empty_bb
= add_condition_to_bb (dispatch_decl
,
30038 function_version_info
[i
].version_decl
,
30039 function_version_info
[i
].predicate_chain
,
30042 /* dispatch default version at the end. */
30043 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
30046 free (function_version_info
);
30050 /* Comparator function to be used in qsort routine to sort attribute
30051 specification strings to "target". */
30054 attr_strcmp (const void *v1
, const void *v2
)
30056 const char *c1
= *(char *const*)v1
;
30057 const char *c2
= *(char *const*)v2
;
30058 return strcmp (c1
, c2
);
30061 /* ARGLIST is the argument to target attribute. This function tokenizes
30062 the comma separated arguments, sorts them and returns a string which
30063 is a unique identifier for the comma separated arguments. It also
30064 replaces non-identifier characters "=,-" with "_". */
30067 sorted_attr_string (tree arglist
)
30070 size_t str_len_sum
= 0;
30071 char **args
= NULL
;
30072 char *attr_str
, *ret_str
;
30074 unsigned int argnum
= 1;
30077 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30079 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30080 size_t len
= strlen (str
);
30081 str_len_sum
+= len
+ 1;
30082 if (arg
!= arglist
)
30084 for (i
= 0; i
< strlen (str
); i
++)
30089 attr_str
= XNEWVEC (char, str_len_sum
);
30091 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30093 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30094 size_t len
= strlen (str
);
30095 memcpy (attr_str
+ str_len_sum
, str
, len
);
30096 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
30097 str_len_sum
+= len
+ 1;
30100 /* Replace "=,-" with "_". */
30101 for (i
= 0; i
< strlen (attr_str
); i
++)
30102 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
30108 args
= XNEWVEC (char *, argnum
);
30111 attr
= strtok (attr_str
, ",");
30112 while (attr
!= NULL
)
30116 attr
= strtok (NULL
, ",");
30119 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
30121 ret_str
= XNEWVEC (char, str_len_sum
);
30123 for (i
= 0; i
< argnum
; i
++)
30125 size_t len
= strlen (args
[i
]);
30126 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
30127 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
30128 str_len_sum
+= len
+ 1;
30132 XDELETEVEC (attr_str
);
30136 /* This function changes the assembler name for functions that are
30137 versions. If DECL is a function version and has a "target"
30138 attribute, it appends the attribute string to its assembler name. */
30141 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
30144 const char *orig_name
, *version_string
;
30145 char *attr_str
, *assembler_name
;
30147 if (DECL_DECLARED_INLINE_P (decl
)
30148 && lookup_attribute ("gnu_inline",
30149 DECL_ATTRIBUTES (decl
)))
30150 error_at (DECL_SOURCE_LOCATION (decl
),
30151 "Function versions cannot be marked as gnu_inline,"
30152 " bodies have to be generated");
30154 if (DECL_VIRTUAL_P (decl
)
30155 || DECL_VINDEX (decl
))
30156 sorry ("Virtual function multiversioning not supported");
30158 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30160 /* target attribute string cannot be NULL. */
30161 gcc_assert (version_attr
!= NULL_TREE
);
30163 orig_name
= IDENTIFIER_POINTER (id
);
30165 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
30167 if (strcmp (version_string
, "default") == 0)
30170 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
30171 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
30173 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
30175 /* Allow assembler name to be modified if already set. */
30176 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
30177 SET_DECL_RTL (decl
, NULL
);
30179 tree ret
= get_identifier (assembler_name
);
30180 XDELETEVEC (attr_str
);
30181 XDELETEVEC (assembler_name
);
30185 /* This function returns true if FN1 and FN2 are versions of the same function,
30186 that is, the target strings of the function decls are different. This assumes
30187 that FN1 and FN2 have the same signature. */
30190 ix86_function_versions (tree fn1
, tree fn2
)
30193 char *target1
, *target2
;
30196 if (TREE_CODE (fn1
) != FUNCTION_DECL
30197 || TREE_CODE (fn2
) != FUNCTION_DECL
)
30200 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
30201 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
30203 /* At least one function decl should have the target attribute specified. */
30204 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
30207 /* Diagnose missing target attribute if one of the decls is already
30208 multi-versioned. */
30209 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
30211 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
30213 if (attr2
!= NULL_TREE
)
30220 error_at (DECL_SOURCE_LOCATION (fn2
),
30221 "missing %<target%> attribute for multi-versioned %D",
30223 inform (DECL_SOURCE_LOCATION (fn1
),
30224 "previous declaration of %D", fn1
);
30225 /* Prevent diagnosing of the same error multiple times. */
30226 DECL_ATTRIBUTES (fn2
)
30227 = tree_cons (get_identifier ("target"),
30228 copy_node (TREE_VALUE (attr1
)),
30229 DECL_ATTRIBUTES (fn2
));
30234 target1
= sorted_attr_string (TREE_VALUE (attr1
));
30235 target2
= sorted_attr_string (TREE_VALUE (attr2
));
30237 /* The sorted target strings must be different for fn1 and fn2
30239 if (strcmp (target1
, target2
) == 0)
30244 XDELETEVEC (target1
);
30245 XDELETEVEC (target2
);
30251 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
30253 /* For function version, add the target suffix to the assembler name. */
30254 if (TREE_CODE (decl
) == FUNCTION_DECL
30255 && DECL_FUNCTION_VERSIONED (decl
))
30256 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
30257 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
30258 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
30264 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
30265 is true, append the full path name of the source file. */
30268 make_name (tree decl
, const char *suffix
, bool make_unique
)
30270 char *global_var_name
;
30273 const char *unique_name
= NULL
;
30275 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
30277 /* Get a unique name that can be used globally without any chances
30278 of collision at link time. */
30280 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
30282 name_len
= strlen (name
) + strlen (suffix
) + 2;
30285 name_len
+= strlen (unique_name
) + 1;
30286 global_var_name
= XNEWVEC (char, name_len
);
30288 /* Use '.' to concatenate names as it is demangler friendly. */
30290 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
30293 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
30295 return global_var_name
;
30298 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30300 /* Make a dispatcher declaration for the multi-versioned function DECL.
30301 Calls to DECL function will be replaced with calls to the dispatcher
30302 by the front-end. Return the decl created. */
30305 make_dispatcher_decl (const tree decl
)
30309 tree fn_type
, func_type
;
30310 bool is_uniq
= false;
30312 if (TREE_PUBLIC (decl
) == 0)
30315 func_name
= make_name (decl
, "ifunc", is_uniq
);
30317 fn_type
= TREE_TYPE (decl
);
30318 func_type
= build_function_type (TREE_TYPE (fn_type
),
30319 TYPE_ARG_TYPES (fn_type
));
30321 func_decl
= build_fn_decl (func_name
, func_type
);
30322 XDELETEVEC (func_name
);
30323 TREE_USED (func_decl
) = 1;
30324 DECL_CONTEXT (func_decl
) = NULL_TREE
;
30325 DECL_INITIAL (func_decl
) = error_mark_node
;
30326 DECL_ARTIFICIAL (func_decl
) = 1;
30327 /* Mark this func as external, the resolver will flip it again if
30328 it gets generated. */
30329 DECL_EXTERNAL (func_decl
) = 1;
30330 /* This will be of type IFUNCs have to be externally visible. */
30331 TREE_PUBLIC (func_decl
) = 1;
30338 /* Returns true if decl is multi-versioned and DECL is the default function,
30339 that is it is not tagged with target specific optimization. */
30342 is_function_default_version (const tree decl
)
30344 if (TREE_CODE (decl
) != FUNCTION_DECL
30345 || !DECL_FUNCTION_VERSIONED (decl
))
30347 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30349 attr
= TREE_VALUE (TREE_VALUE (attr
));
30350 return (TREE_CODE (attr
) == STRING_CST
30351 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
30354 /* Make a dispatcher declaration for the multi-versioned function DECL.
30355 Calls to DECL function will be replaced with calls to the dispatcher
30356 by the front-end. Returns the decl of the dispatcher function. */
30359 ix86_get_function_versions_dispatcher (void *decl
)
30361 tree fn
= (tree
) decl
;
30362 struct cgraph_node
*node
= NULL
;
30363 struct cgraph_node
*default_node
= NULL
;
30364 struct cgraph_function_version_info
*node_v
= NULL
;
30365 struct cgraph_function_version_info
*first_v
= NULL
;
30367 tree dispatch_decl
= NULL
;
30369 struct cgraph_function_version_info
*default_version_info
= NULL
;
30371 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
30373 node
= cgraph_get_node (fn
);
30374 gcc_assert (node
!= NULL
);
30376 node_v
= get_cgraph_node_version (node
);
30377 gcc_assert (node_v
!= NULL
);
30379 if (node_v
->dispatcher_resolver
!= NULL
)
30380 return node_v
->dispatcher_resolver
;
30382 /* Find the default version and make it the first node. */
30384 /* Go to the beginning of the chain. */
30385 while (first_v
->prev
!= NULL
)
30386 first_v
= first_v
->prev
;
30387 default_version_info
= first_v
;
30388 while (default_version_info
!= NULL
)
30390 if (is_function_default_version
30391 (default_version_info
->this_node
->decl
))
30393 default_version_info
= default_version_info
->next
;
30396 /* If there is no default node, just return NULL. */
30397 if (default_version_info
== NULL
)
30400 /* Make default info the first node. */
30401 if (first_v
!= default_version_info
)
30403 default_version_info
->prev
->next
= default_version_info
->next
;
30404 if (default_version_info
->next
)
30405 default_version_info
->next
->prev
= default_version_info
->prev
;
30406 first_v
->prev
= default_version_info
;
30407 default_version_info
->next
= first_v
;
30408 default_version_info
->prev
= NULL
;
30411 default_node
= default_version_info
->this_node
;
30413 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30414 if (targetm
.has_ifunc_p ())
30416 struct cgraph_function_version_info
*it_v
= NULL
;
30417 struct cgraph_node
*dispatcher_node
= NULL
;
30418 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
30420 /* Right now, the dispatching is done via ifunc. */
30421 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
30423 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
30424 gcc_assert (dispatcher_node
!= NULL
);
30425 dispatcher_node
->dispatcher_function
= 1;
30426 dispatcher_version_info
30427 = insert_new_cgraph_node_version (dispatcher_node
);
30428 dispatcher_version_info
->next
= default_version_info
;
30429 dispatcher_node
->definition
= 1;
30431 /* Set the dispatcher for all the versions. */
30432 it_v
= default_version_info
;
30433 while (it_v
!= NULL
)
30435 it_v
->dispatcher_resolver
= dispatch_decl
;
30442 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
30443 "multiversioning needs ifunc which is not supported "
30447 return dispatch_decl
;
30450 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
30454 make_attribute (const char *name
, const char *arg_name
, tree chain
)
30457 tree attr_arg_name
;
30461 attr_name
= get_identifier (name
);
30462 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
30463 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
30464 attr
= tree_cons (attr_name
, attr_args
, chain
);
30468 /* Make the resolver function decl to dispatch the versions of
30469 a multi-versioned function, DEFAULT_DECL. Create an
30470 empty basic block in the resolver and store the pointer in
30471 EMPTY_BB. Return the decl of the resolver function. */
30474 make_resolver_func (const tree default_decl
,
30475 const tree dispatch_decl
,
30476 basic_block
*empty_bb
)
30478 char *resolver_name
;
30479 tree decl
, type
, decl_name
, t
;
30480 bool is_uniq
= false;
30482 /* IFUNC's have to be globally visible. So, if the default_decl is
30483 not, then the name of the IFUNC should be made unique. */
30484 if (TREE_PUBLIC (default_decl
) == 0)
30487 /* Append the filename to the resolver function if the versions are
30488 not externally visible. This is because the resolver function has
30489 to be externally visible for the loader to find it. So, appending
30490 the filename will prevent conflicts with a resolver function from
30491 another module which is based on the same version name. */
30492 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
30494 /* The resolver function should return a (void *). */
30495 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
30497 decl
= build_fn_decl (resolver_name
, type
);
30498 decl_name
= get_identifier (resolver_name
);
30499 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
30501 DECL_NAME (decl
) = decl_name
;
30502 TREE_USED (decl
) = 1;
30503 DECL_ARTIFICIAL (decl
) = 1;
30504 DECL_IGNORED_P (decl
) = 0;
30505 /* IFUNC resolvers have to be externally visible. */
30506 TREE_PUBLIC (decl
) = 1;
30507 DECL_UNINLINABLE (decl
) = 1;
30509 /* Resolver is not external, body is generated. */
30510 DECL_EXTERNAL (decl
) = 0;
30511 DECL_EXTERNAL (dispatch_decl
) = 0;
30513 DECL_CONTEXT (decl
) = NULL_TREE
;
30514 DECL_INITIAL (decl
) = make_node (BLOCK
);
30515 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
30517 if (DECL_COMDAT_GROUP (default_decl
)
30518 || TREE_PUBLIC (default_decl
))
30520 /* In this case, each translation unit with a call to this
30521 versioned function will put out a resolver. Ensure it
30522 is comdat to keep just one copy. */
30523 DECL_COMDAT (decl
) = 1;
30524 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
30526 /* Build result decl and add to function_decl. */
30527 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
30528 DECL_ARTIFICIAL (t
) = 1;
30529 DECL_IGNORED_P (t
) = 1;
30530 DECL_RESULT (decl
) = t
;
30532 gimplify_function_tree (decl
);
30533 push_cfun (DECL_STRUCT_FUNCTION (decl
));
30534 *empty_bb
= init_lowered_empty_function (decl
, false);
30536 cgraph_add_new_function (decl
, true);
30537 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
30541 gcc_assert (dispatch_decl
!= NULL
);
30542 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
30543 DECL_ATTRIBUTES (dispatch_decl
)
30544 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
30546 /* Create the alias for dispatch to resolver here. */
30547 /*cgraph_create_function_alias (dispatch_decl, decl);*/
30548 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
30549 XDELETEVEC (resolver_name
);
30553 /* Generate the dispatching code body to dispatch multi-versioned function
30554 DECL. The target hook is called to process the "target" attributes and
30555 provide the code to dispatch the right function at run-time. NODE points
30556 to the dispatcher decl whose body will be created. */
30559 ix86_generate_version_dispatcher_body (void *node_p
)
30561 tree resolver_decl
;
30562 basic_block empty_bb
;
30563 vec
<tree
> fn_ver_vec
= vNULL
;
30564 tree default_ver_decl
;
30565 struct cgraph_node
*versn
;
30566 struct cgraph_node
*node
;
30568 struct cgraph_function_version_info
*node_version_info
= NULL
;
30569 struct cgraph_function_version_info
*versn_info
= NULL
;
30571 node
= (cgraph_node
*)node_p
;
30573 node_version_info
= get_cgraph_node_version (node
);
30574 gcc_assert (node
->dispatcher_function
30575 && node_version_info
!= NULL
);
30577 if (node_version_info
->dispatcher_resolver
)
30578 return node_version_info
->dispatcher_resolver
;
30580 /* The first version in the chain corresponds to the default version. */
30581 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
30583 /* node is going to be an alias, so remove the finalized bit. */
30584 node
->definition
= false;
30586 resolver_decl
= make_resolver_func (default_ver_decl
,
30587 node
->decl
, &empty_bb
);
30589 node_version_info
->dispatcher_resolver
= resolver_decl
;
30591 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30593 fn_ver_vec
.create (2);
30595 for (versn_info
= node_version_info
->next
; versn_info
;
30596 versn_info
= versn_info
->next
)
30598 versn
= versn_info
->this_node
;
30599 /* Check for virtual functions here again, as by this time it should
30600 have been determined if this function needs a vtable index or
30601 not. This happens for methods in derived classes that override
30602 virtual methods in base classes but are not explicitly marked as
30604 if (DECL_VINDEX (versn
->decl
))
30605 sorry ("Virtual function multiversioning not supported");
30607 fn_ver_vec
.safe_push (versn
->decl
);
30610 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30611 fn_ver_vec
.release ();
30612 rebuild_cgraph_edges ();
30614 return resolver_decl
;
30616 /* This builds the processor_model struct type defined in
30617 libgcc/config/i386/cpuinfo.c */
30620 build_processor_model_struct (void)
30622 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30624 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30626 tree type
= make_node (RECORD_TYPE
);
30628 /* The first 3 fields are unsigned int. */
30629 for (i
= 0; i
< 3; ++i
)
30631 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30632 get_identifier (field_name
[i
]), unsigned_type_node
);
30633 if (field_chain
!= NULL_TREE
)
30634 DECL_CHAIN (field
) = field_chain
;
30635 field_chain
= field
;
30638 /* The last field is an array of unsigned integers of size one. */
30639 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30640 get_identifier (field_name
[3]),
30641 build_array_type (unsigned_type_node
,
30642 build_index_type (size_one_node
)));
30643 if (field_chain
!= NULL_TREE
)
30644 DECL_CHAIN (field
) = field_chain
;
30645 field_chain
= field
;
30647 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30651 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30654 make_var_decl (tree type
, const char *name
)
30658 new_decl
= build_decl (UNKNOWN_LOCATION
,
30660 get_identifier(name
),
30663 DECL_EXTERNAL (new_decl
) = 1;
30664 TREE_STATIC (new_decl
) = 1;
30665 TREE_PUBLIC (new_decl
) = 1;
30666 DECL_INITIAL (new_decl
) = 0;
30667 DECL_ARTIFICIAL (new_decl
) = 0;
30668 DECL_PRESERVE_P (new_decl
) = 1;
30670 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30671 assemble_variable (new_decl
, 0, 0, 0);
30676 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30677 into an integer defined in libgcc/config/i386/cpuinfo.c */
30680 fold_builtin_cpu (tree fndecl
, tree
*args
)
30683 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30684 DECL_FUNCTION_CODE (fndecl
);
30685 tree param_string_cst
= NULL
;
30687 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30688 enum processor_features
30704 /* These are the values for vendor types and cpu types and subtypes
30705 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30706 the corresponding start value. */
30707 enum processor_model
30718 M_CPU_SUBTYPE_START
,
30719 M_INTEL_COREI7_NEHALEM
,
30720 M_INTEL_COREI7_WESTMERE
,
30721 M_INTEL_COREI7_SANDYBRIDGE
,
30722 M_AMDFAM10H_BARCELONA
,
30723 M_AMDFAM10H_SHANGHAI
,
30724 M_AMDFAM10H_ISTANBUL
,
30725 M_AMDFAM15H_BDVER1
,
30726 M_AMDFAM15H_BDVER2
,
30730 static struct _arch_names_table
30732 const char *const name
;
30733 const enum processor_model model
;
30735 const arch_names_table
[] =
30738 {"intel", M_INTEL
},
30739 {"atom", M_INTEL_ATOM
},
30740 {"slm", M_INTEL_SLM
},
30741 {"core2", M_INTEL_CORE2
},
30742 {"corei7", M_INTEL_COREI7
},
30743 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30744 {"westmere", M_INTEL_COREI7_WESTMERE
},
30745 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30746 {"amdfam10h", M_AMDFAM10H
},
30747 {"barcelona", M_AMDFAM10H_BARCELONA
},
30748 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30749 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30750 {"amdfam15h", M_AMDFAM15H
},
30751 {"bdver1", M_AMDFAM15H_BDVER1
},
30752 {"bdver2", M_AMDFAM15H_BDVER2
},
30753 {"bdver3", M_AMDFAM15H_BDVER3
},
30756 static struct _isa_names_table
30758 const char *const name
;
30759 const enum processor_features feature
;
30761 const isa_names_table
[] =
30765 {"popcnt", F_POPCNT
},
30769 {"ssse3", F_SSSE3
},
30770 {"sse4.1", F_SSE4_1
},
30771 {"sse4.2", F_SSE4_2
},
30776 tree __processor_model_type
= build_processor_model_struct ();
30777 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30781 varpool_add_new_variable (__cpu_model_var
);
30783 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30785 param_string_cst
= *args
;
30786 while (param_string_cst
30787 && TREE_CODE (param_string_cst
) != STRING_CST
)
30789 /* *args must be a expr that can contain other EXPRS leading to a
30791 if (!EXPR_P (param_string_cst
))
30793 error ("Parameter to builtin must be a string constant or literal");
30794 return integer_zero_node
;
30796 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30799 gcc_assert (param_string_cst
);
30801 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30807 unsigned int field_val
= 0;
30808 unsigned int NUM_ARCH_NAMES
30809 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30811 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30812 if (strcmp (arch_names_table
[i
].name
,
30813 TREE_STRING_POINTER (param_string_cst
)) == 0)
30816 if (i
== NUM_ARCH_NAMES
)
30818 error ("Parameter to builtin not valid: %s",
30819 TREE_STRING_POINTER (param_string_cst
));
30820 return integer_zero_node
;
30823 field
= TYPE_FIELDS (__processor_model_type
);
30824 field_val
= arch_names_table
[i
].model
;
30826 /* CPU types are stored in the next field. */
30827 if (field_val
> M_CPU_TYPE_START
30828 && field_val
< M_CPU_SUBTYPE_START
)
30830 field
= DECL_CHAIN (field
);
30831 field_val
-= M_CPU_TYPE_START
;
30834 /* CPU subtypes are stored in the next field. */
30835 if (field_val
> M_CPU_SUBTYPE_START
)
30837 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30838 field_val
-= M_CPU_SUBTYPE_START
;
30841 /* Get the appropriate field in __cpu_model. */
30842 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30845 /* Check the value. */
30846 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30847 build_int_cstu (unsigned_type_node
, field_val
));
30848 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30850 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30857 unsigned int field_val
= 0;
30858 unsigned int NUM_ISA_NAMES
30859 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
30861 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
30862 if (strcmp (isa_names_table
[i
].name
,
30863 TREE_STRING_POINTER (param_string_cst
)) == 0)
30866 if (i
== NUM_ISA_NAMES
)
30868 error ("Parameter to builtin not valid: %s",
30869 TREE_STRING_POINTER (param_string_cst
));
30870 return integer_zero_node
;
30873 field
= TYPE_FIELDS (__processor_model_type
);
30874 /* Get the last field, which is __cpu_features. */
30875 while (DECL_CHAIN (field
))
30876 field
= DECL_CHAIN (field
);
30878 /* Get the appropriate field: __cpu_model.__cpu_features */
30879 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30882 /* Access the 0th element of __cpu_features array. */
30883 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
30884 integer_zero_node
, NULL_TREE
, NULL_TREE
);
30886 field_val
= (1 << isa_names_table
[i
].feature
);
30887 /* Return __cpu_model.__cpu_features[0] & field_val */
30888 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
30889 build_int_cstu (unsigned_type_node
, field_val
));
30890 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30892 gcc_unreachable ();
30896 ix86_fold_builtin (tree fndecl
, int n_args
,
30897 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
30899 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30901 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30902 DECL_FUNCTION_CODE (fndecl
);
30903 if (fn_code
== IX86_BUILTIN_CPU_IS
30904 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30906 gcc_assert (n_args
== 1);
30907 return fold_builtin_cpu (fndecl
, args
);
30911 #ifdef SUBTARGET_FOLD_BUILTIN
30912 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
30918 /* Make builtins to detect cpu type and features supported. NAME is
30919 the builtin name, CODE is the builtin code, and FTYPE is the function
30920 type of the builtin. */
30923 make_cpu_type_builtin (const char* name
, int code
,
30924 enum ix86_builtin_func_type ftype
, bool is_const
)
30929 type
= ix86_get_builtin_func_type (ftype
);
30930 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
30932 gcc_assert (decl
!= NULL_TREE
);
30933 ix86_builtins
[(int) code
] = decl
;
30934 TREE_READONLY (decl
) = is_const
;
30937 /* Make builtins to get CPU type and features supported. The created
30940 __builtin_cpu_init (), to detect cpu type and features,
30941 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30942 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30946 ix86_init_platform_type_builtins (void)
30948 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
30949 INT_FTYPE_VOID
, false);
30950 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
30951 INT_FTYPE_PCCHAR
, true);
30952 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
30953 INT_FTYPE_PCCHAR
, true);
30956 /* Internal method for ix86_init_builtins. */
30959 ix86_init_builtins_va_builtins_abi (void)
30961 tree ms_va_ref
, sysv_va_ref
;
30962 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
30963 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
30964 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
30965 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
30969 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
30970 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
30971 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
30973 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
30976 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30977 fnvoid_va_start_ms
=
30978 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30979 fnvoid_va_end_sysv
=
30980 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
30981 fnvoid_va_start_sysv
=
30982 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
30984 fnvoid_va_copy_ms
=
30985 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
30987 fnvoid_va_copy_sysv
=
30988 build_function_type_list (void_type_node
, sysv_va_ref
,
30989 sysv_va_ref
, NULL_TREE
);
30991 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
30992 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30993 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
30994 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30995 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
30996 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30997 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
30998 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30999 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
31000 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31001 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
31002 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31006 ix86_init_builtin_types (void)
31008 tree float128_type_node
, float80_type_node
;
31010 /* The __float80 type. */
31011 float80_type_node
= long_double_type_node
;
31012 if (TYPE_MODE (float80_type_node
) != XFmode
)
31014 /* The __float80 type. */
31015 float80_type_node
= make_node (REAL_TYPE
);
31017 TYPE_PRECISION (float80_type_node
) = 80;
31018 layout_type (float80_type_node
);
31020 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
31022 /* The __float128 type. */
31023 float128_type_node
= make_node (REAL_TYPE
);
31024 TYPE_PRECISION (float128_type_node
) = 128;
31025 layout_type (float128_type_node
);
31026 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
31028 /* This macro is built by i386-builtin-types.awk. */
31029 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
31033 ix86_init_builtins (void)
31037 ix86_init_builtin_types ();
31039 /* Builtins to get CPU type and features. */
31040 ix86_init_platform_type_builtins ();
31042 /* TFmode support builtins. */
31043 def_builtin_const (0, "__builtin_infq",
31044 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
31045 def_builtin_const (0, "__builtin_huge_valq",
31046 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
31048 /* We will expand them to normal call if SSE isn't available since
31049 they are used by libgcc. */
31050 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
31051 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
31052 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
31053 TREE_READONLY (t
) = 1;
31054 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
31056 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
31057 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
31058 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
31059 TREE_READONLY (t
) = 1;
31060 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
31062 ix86_init_tm_builtins ();
31063 ix86_init_mmx_sse_builtins ();
31066 ix86_init_builtins_va_builtins_abi ();
31068 #ifdef SUBTARGET_INIT_BUILTINS
31069 SUBTARGET_INIT_BUILTINS
;
31073 /* Return the ix86 builtin for CODE. */
31076 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
31078 if (code
>= IX86_BUILTIN_MAX
)
31079 return error_mark_node
;
31081 return ix86_builtins
[code
];
31084 /* Errors in the source file can cause expand_expr to return const0_rtx
31085 where we expect a vector. To avoid crashing, use one of the vector
31086 clear instructions. */
31088 safe_vector_operand (rtx x
, enum machine_mode mode
)
31090 if (x
== const0_rtx
)
31091 x
= CONST0_RTX (mode
);
31095 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
31098 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
31101 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31102 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31103 rtx op0
= expand_normal (arg0
);
31104 rtx op1
= expand_normal (arg1
);
31105 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31106 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31107 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
31109 if (VECTOR_MODE_P (mode0
))
31110 op0
= safe_vector_operand (op0
, mode0
);
31111 if (VECTOR_MODE_P (mode1
))
31112 op1
= safe_vector_operand (op1
, mode1
);
31114 if (optimize
|| !target
31115 || GET_MODE (target
) != tmode
31116 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31117 target
= gen_reg_rtx (tmode
);
31119 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
31121 rtx x
= gen_reg_rtx (V4SImode
);
31122 emit_insn (gen_sse2_loadd (x
, op1
));
31123 op1
= gen_lowpart (TImode
, x
);
31126 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31127 op0
= copy_to_mode_reg (mode0
, op0
);
31128 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
31129 op1
= copy_to_mode_reg (mode1
, op1
);
31131 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31140 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
31143 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
31144 enum ix86_builtin_func_type m_type
,
31145 enum rtx_code sub_code
)
31150 bool comparison_p
= false;
31152 bool last_arg_constant
= false;
31153 int num_memory
= 0;
31156 enum machine_mode mode
;
31159 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31163 case MULTI_ARG_4_DF2_DI_I
:
31164 case MULTI_ARG_4_DF2_DI_I1
:
31165 case MULTI_ARG_4_SF2_SI_I
:
31166 case MULTI_ARG_4_SF2_SI_I1
:
31168 last_arg_constant
= true;
31171 case MULTI_ARG_3_SF
:
31172 case MULTI_ARG_3_DF
:
31173 case MULTI_ARG_3_SF2
:
31174 case MULTI_ARG_3_DF2
:
31175 case MULTI_ARG_3_DI
:
31176 case MULTI_ARG_3_SI
:
31177 case MULTI_ARG_3_SI_DI
:
31178 case MULTI_ARG_3_HI
:
31179 case MULTI_ARG_3_HI_SI
:
31180 case MULTI_ARG_3_QI
:
31181 case MULTI_ARG_3_DI2
:
31182 case MULTI_ARG_3_SI2
:
31183 case MULTI_ARG_3_HI2
:
31184 case MULTI_ARG_3_QI2
:
31188 case MULTI_ARG_2_SF
:
31189 case MULTI_ARG_2_DF
:
31190 case MULTI_ARG_2_DI
:
31191 case MULTI_ARG_2_SI
:
31192 case MULTI_ARG_2_HI
:
31193 case MULTI_ARG_2_QI
:
31197 case MULTI_ARG_2_DI_IMM
:
31198 case MULTI_ARG_2_SI_IMM
:
31199 case MULTI_ARG_2_HI_IMM
:
31200 case MULTI_ARG_2_QI_IMM
:
31202 last_arg_constant
= true;
31205 case MULTI_ARG_1_SF
:
31206 case MULTI_ARG_1_DF
:
31207 case MULTI_ARG_1_SF2
:
31208 case MULTI_ARG_1_DF2
:
31209 case MULTI_ARG_1_DI
:
31210 case MULTI_ARG_1_SI
:
31211 case MULTI_ARG_1_HI
:
31212 case MULTI_ARG_1_QI
:
31213 case MULTI_ARG_1_SI_DI
:
31214 case MULTI_ARG_1_HI_DI
:
31215 case MULTI_ARG_1_HI_SI
:
31216 case MULTI_ARG_1_QI_DI
:
31217 case MULTI_ARG_1_QI_SI
:
31218 case MULTI_ARG_1_QI_HI
:
31222 case MULTI_ARG_2_DI_CMP
:
31223 case MULTI_ARG_2_SI_CMP
:
31224 case MULTI_ARG_2_HI_CMP
:
31225 case MULTI_ARG_2_QI_CMP
:
31227 comparison_p
= true;
31230 case MULTI_ARG_2_SF_TF
:
31231 case MULTI_ARG_2_DF_TF
:
31232 case MULTI_ARG_2_DI_TF
:
31233 case MULTI_ARG_2_SI_TF
:
31234 case MULTI_ARG_2_HI_TF
:
31235 case MULTI_ARG_2_QI_TF
:
31241 gcc_unreachable ();
31244 if (optimize
|| !target
31245 || GET_MODE (target
) != tmode
31246 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31247 target
= gen_reg_rtx (tmode
);
31249 gcc_assert (nargs
<= 4);
31251 for (i
= 0; i
< nargs
; i
++)
31253 tree arg
= CALL_EXPR_ARG (exp
, i
);
31254 rtx op
= expand_normal (arg
);
31255 int adjust
= (comparison_p
) ? 1 : 0;
31256 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
31258 if (last_arg_constant
&& i
== nargs
- 1)
31260 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
31262 enum insn_code new_icode
= icode
;
31265 case CODE_FOR_xop_vpermil2v2df3
:
31266 case CODE_FOR_xop_vpermil2v4sf3
:
31267 case CODE_FOR_xop_vpermil2v4df3
:
31268 case CODE_FOR_xop_vpermil2v8sf3
:
31269 error ("the last argument must be a 2-bit immediate");
31270 return gen_reg_rtx (tmode
);
31271 case CODE_FOR_xop_rotlv2di3
:
31272 new_icode
= CODE_FOR_rotlv2di3
;
31274 case CODE_FOR_xop_rotlv4si3
:
31275 new_icode
= CODE_FOR_rotlv4si3
;
31277 case CODE_FOR_xop_rotlv8hi3
:
31278 new_icode
= CODE_FOR_rotlv8hi3
;
31280 case CODE_FOR_xop_rotlv16qi3
:
31281 new_icode
= CODE_FOR_rotlv16qi3
;
31283 if (CONST_INT_P (op
))
31285 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
31286 op
= GEN_INT (INTVAL (op
) & mask
);
31287 gcc_checking_assert
31288 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
31292 gcc_checking_assert
31294 && insn_data
[new_icode
].operand
[0].mode
== tmode
31295 && insn_data
[new_icode
].operand
[1].mode
== tmode
31296 && insn_data
[new_icode
].operand
[2].mode
== mode
31297 && insn_data
[new_icode
].operand
[0].predicate
31298 == insn_data
[icode
].operand
[0].predicate
31299 && insn_data
[new_icode
].operand
[1].predicate
31300 == insn_data
[icode
].operand
[1].predicate
);
31306 gcc_unreachable ();
31313 if (VECTOR_MODE_P (mode
))
31314 op
= safe_vector_operand (op
, mode
);
31316 /* If we aren't optimizing, only allow one memory operand to be
31318 if (memory_operand (op
, mode
))
31321 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
31324 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
31326 op
= force_reg (mode
, op
);
31330 args
[i
].mode
= mode
;
31336 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31341 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
31342 GEN_INT ((int)sub_code
));
31343 else if (! comparison_p
)
31344 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31347 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
31351 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
31356 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31360 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
31364 gcc_unreachable ();
31374 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
31375 insns with vec_merge. */
31378 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
31382 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31383 rtx op1
, op0
= expand_normal (arg0
);
31384 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31385 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31387 if (optimize
|| !target
31388 || GET_MODE (target
) != tmode
31389 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31390 target
= gen_reg_rtx (tmode
);
31392 if (VECTOR_MODE_P (mode0
))
31393 op0
= safe_vector_operand (op0
, mode0
);
31395 if ((optimize
&& !register_operand (op0
, mode0
))
31396 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31397 op0
= copy_to_mode_reg (mode0
, op0
);
31400 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
31401 op1
= copy_to_mode_reg (mode0
, op1
);
31403 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31410 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
31413 ix86_expand_sse_compare (const struct builtin_description
*d
,
31414 tree exp
, rtx target
, bool swap
)
31417 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31418 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31419 rtx op0
= expand_normal (arg0
);
31420 rtx op1
= expand_normal (arg1
);
31422 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31423 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31424 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31425 enum rtx_code comparison
= d
->comparison
;
31427 if (VECTOR_MODE_P (mode0
))
31428 op0
= safe_vector_operand (op0
, mode0
);
31429 if (VECTOR_MODE_P (mode1
))
31430 op1
= safe_vector_operand (op1
, mode1
);
31432 /* Swap operands if we have a comparison that isn't available in
31436 rtx tmp
= gen_reg_rtx (mode1
);
31437 emit_move_insn (tmp
, op1
);
31442 if (optimize
|| !target
31443 || GET_MODE (target
) != tmode
31444 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31445 target
= gen_reg_rtx (tmode
);
31447 if ((optimize
&& !register_operand (op0
, mode0
))
31448 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
31449 op0
= copy_to_mode_reg (mode0
, op0
);
31450 if ((optimize
&& !register_operand (op1
, mode1
))
31451 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
31452 op1
= copy_to_mode_reg (mode1
, op1
);
31454 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
31455 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31462 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
31465 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
31469 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31470 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31471 rtx op0
= expand_normal (arg0
);
31472 rtx op1
= expand_normal (arg1
);
31473 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31474 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31475 enum rtx_code comparison
= d
->comparison
;
31477 if (VECTOR_MODE_P (mode0
))
31478 op0
= safe_vector_operand (op0
, mode0
);
31479 if (VECTOR_MODE_P (mode1
))
31480 op1
= safe_vector_operand (op1
, mode1
);
31482 /* Swap operands if we have a comparison that isn't available in
31484 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
31491 target
= gen_reg_rtx (SImode
);
31492 emit_move_insn (target
, const0_rtx
);
31493 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31495 if ((optimize
&& !register_operand (op0
, mode0
))
31496 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31497 op0
= copy_to_mode_reg (mode0
, op0
);
31498 if ((optimize
&& !register_operand (op1
, mode1
))
31499 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31500 op1
= copy_to_mode_reg (mode1
, op1
);
31502 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31506 emit_insn (gen_rtx_SET (VOIDmode
,
31507 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31508 gen_rtx_fmt_ee (comparison
, QImode
,
31512 return SUBREG_REG (target
);
31515 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
31518 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
31522 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31523 rtx op1
, op0
= expand_normal (arg0
);
31524 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31525 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31527 if (optimize
|| target
== 0
31528 || GET_MODE (target
) != tmode
31529 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31530 target
= gen_reg_rtx (tmode
);
31532 if (VECTOR_MODE_P (mode0
))
31533 op0
= safe_vector_operand (op0
, mode0
);
31535 if ((optimize
&& !register_operand (op0
, mode0
))
31536 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31537 op0
= copy_to_mode_reg (mode0
, op0
);
31539 op1
= GEN_INT (d
->comparison
);
31541 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
31549 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
31550 tree exp
, rtx target
)
31553 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31554 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31555 rtx op0
= expand_normal (arg0
);
31556 rtx op1
= expand_normal (arg1
);
31558 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31559 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31560 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31562 if (optimize
|| target
== 0
31563 || GET_MODE (target
) != tmode
31564 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31565 target
= gen_reg_rtx (tmode
);
31567 op0
= safe_vector_operand (op0
, mode0
);
31568 op1
= safe_vector_operand (op1
, mode1
);
31570 if ((optimize
&& !register_operand (op0
, mode0
))
31571 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31572 op0
= copy_to_mode_reg (mode0
, op0
);
31573 if ((optimize
&& !register_operand (op1
, mode1
))
31574 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31575 op1
= copy_to_mode_reg (mode1
, op1
);
31577 op2
= GEN_INT (d
->comparison
);
31579 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31586 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31589 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31593 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31594 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31595 rtx op0
= expand_normal (arg0
);
31596 rtx op1
= expand_normal (arg1
);
31597 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31598 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31599 enum rtx_code comparison
= d
->comparison
;
31601 if (VECTOR_MODE_P (mode0
))
31602 op0
= safe_vector_operand (op0
, mode0
);
31603 if (VECTOR_MODE_P (mode1
))
31604 op1
= safe_vector_operand (op1
, mode1
);
31606 target
= gen_reg_rtx (SImode
);
31607 emit_move_insn (target
, const0_rtx
);
31608 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31610 if ((optimize
&& !register_operand (op0
, mode0
))
31611 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31612 op0
= copy_to_mode_reg (mode0
, op0
);
31613 if ((optimize
&& !register_operand (op1
, mode1
))
31614 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31615 op1
= copy_to_mode_reg (mode1
, op1
);
31617 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31621 emit_insn (gen_rtx_SET (VOIDmode
,
31622 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31623 gen_rtx_fmt_ee (comparison
, QImode
,
31627 return SUBREG_REG (target
);
31630 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31633 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31634 tree exp
, rtx target
)
31637 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31638 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31639 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31640 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31641 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31642 rtx scratch0
, scratch1
;
31643 rtx op0
= expand_normal (arg0
);
31644 rtx op1
= expand_normal (arg1
);
31645 rtx op2
= expand_normal (arg2
);
31646 rtx op3
= expand_normal (arg3
);
31647 rtx op4
= expand_normal (arg4
);
31648 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31650 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31651 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31652 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31653 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31654 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31655 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31656 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31658 if (VECTOR_MODE_P (modev2
))
31659 op0
= safe_vector_operand (op0
, modev2
);
31660 if (VECTOR_MODE_P (modev4
))
31661 op2
= safe_vector_operand (op2
, modev4
);
31663 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31664 op0
= copy_to_mode_reg (modev2
, op0
);
31665 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31666 op1
= copy_to_mode_reg (modei3
, op1
);
31667 if ((optimize
&& !register_operand (op2
, modev4
))
31668 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31669 op2
= copy_to_mode_reg (modev4
, op2
);
31670 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31671 op3
= copy_to_mode_reg (modei5
, op3
);
31673 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31675 error ("the fifth argument must be an 8-bit immediate");
31679 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31681 if (optimize
|| !target
31682 || GET_MODE (target
) != tmode0
31683 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31684 target
= gen_reg_rtx (tmode0
);
31686 scratch1
= gen_reg_rtx (tmode1
);
31688 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31690 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31692 if (optimize
|| !target
31693 || GET_MODE (target
) != tmode1
31694 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31695 target
= gen_reg_rtx (tmode1
);
31697 scratch0
= gen_reg_rtx (tmode0
);
31699 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31703 gcc_assert (d
->flag
);
31705 scratch0
= gen_reg_rtx (tmode0
);
31706 scratch1
= gen_reg_rtx (tmode1
);
31708 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31718 target
= gen_reg_rtx (SImode
);
31719 emit_move_insn (target
, const0_rtx
);
31720 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31723 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31724 gen_rtx_fmt_ee (EQ
, QImode
,
31725 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31728 return SUBREG_REG (target
);
31735 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31738 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31739 tree exp
, rtx target
)
31742 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31743 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31744 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31745 rtx scratch0
, scratch1
;
31746 rtx op0
= expand_normal (arg0
);
31747 rtx op1
= expand_normal (arg1
);
31748 rtx op2
= expand_normal (arg2
);
31749 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31751 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31752 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31753 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31754 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31755 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31757 if (VECTOR_MODE_P (modev2
))
31758 op0
= safe_vector_operand (op0
, modev2
);
31759 if (VECTOR_MODE_P (modev3
))
31760 op1
= safe_vector_operand (op1
, modev3
);
31762 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31763 op0
= copy_to_mode_reg (modev2
, op0
);
31764 if ((optimize
&& !register_operand (op1
, modev3
))
31765 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31766 op1
= copy_to_mode_reg (modev3
, op1
);
31768 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31770 error ("the third argument must be an 8-bit immediate");
31774 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31776 if (optimize
|| !target
31777 || GET_MODE (target
) != tmode0
31778 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31779 target
= gen_reg_rtx (tmode0
);
31781 scratch1
= gen_reg_rtx (tmode1
);
31783 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31785 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31787 if (optimize
|| !target
31788 || GET_MODE (target
) != tmode1
31789 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31790 target
= gen_reg_rtx (tmode1
);
31792 scratch0
= gen_reg_rtx (tmode0
);
31794 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31798 gcc_assert (d
->flag
);
31800 scratch0
= gen_reg_rtx (tmode0
);
31801 scratch1
= gen_reg_rtx (tmode1
);
31803 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31813 target
= gen_reg_rtx (SImode
);
31814 emit_move_insn (target
, const0_rtx
);
31815 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31818 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31819 gen_rtx_fmt_ee (EQ
, QImode
,
31820 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31823 return SUBREG_REG (target
);
31829 /* Subroutine of ix86_expand_builtin to take care of insns with
31830 variable number of operands. */
31833 ix86_expand_args_builtin (const struct builtin_description
*d
,
31834 tree exp
, rtx target
)
31836 rtx pat
, real_target
;
31837 unsigned int i
, nargs
;
31838 unsigned int nargs_constant
= 0;
31839 int num_memory
= 0;
31843 enum machine_mode mode
;
31845 bool last_arg_count
= false;
31846 enum insn_code icode
= d
->icode
;
31847 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31848 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31849 enum machine_mode rmode
= VOIDmode
;
31851 enum rtx_code comparison
= d
->comparison
;
31853 switch ((enum ix86_builtin_func_type
) d
->flag
)
31855 case V2DF_FTYPE_V2DF_ROUND
:
31856 case V4DF_FTYPE_V4DF_ROUND
:
31857 case V4SF_FTYPE_V4SF_ROUND
:
31858 case V8SF_FTYPE_V8SF_ROUND
:
31859 case V4SI_FTYPE_V4SF_ROUND
:
31860 case V8SI_FTYPE_V8SF_ROUND
:
31861 return ix86_expand_sse_round (d
, exp
, target
);
31862 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
31863 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
31864 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
31865 case INT_FTYPE_V8SF_V8SF_PTEST
:
31866 case INT_FTYPE_V4DI_V4DI_PTEST
:
31867 case INT_FTYPE_V4DF_V4DF_PTEST
:
31868 case INT_FTYPE_V4SF_V4SF_PTEST
:
31869 case INT_FTYPE_V2DI_V2DI_PTEST
:
31870 case INT_FTYPE_V2DF_V2DF_PTEST
:
31871 return ix86_expand_sse_ptest (d
, exp
, target
);
31872 case FLOAT128_FTYPE_FLOAT128
:
31873 case FLOAT_FTYPE_FLOAT
:
31874 case INT_FTYPE_INT
:
31875 case UINT64_FTYPE_INT
:
31876 case UINT16_FTYPE_UINT16
:
31877 case INT64_FTYPE_INT64
:
31878 case INT64_FTYPE_V4SF
:
31879 case INT64_FTYPE_V2DF
:
31880 case INT_FTYPE_V16QI
:
31881 case INT_FTYPE_V8QI
:
31882 case INT_FTYPE_V8SF
:
31883 case INT_FTYPE_V4DF
:
31884 case INT_FTYPE_V4SF
:
31885 case INT_FTYPE_V2DF
:
31886 case INT_FTYPE_V32QI
:
31887 case V16QI_FTYPE_V16QI
:
31888 case V8SI_FTYPE_V8SF
:
31889 case V8SI_FTYPE_V4SI
:
31890 case V8HI_FTYPE_V8HI
:
31891 case V8HI_FTYPE_V16QI
:
31892 case V8QI_FTYPE_V8QI
:
31893 case V8SF_FTYPE_V8SF
:
31894 case V8SF_FTYPE_V8SI
:
31895 case V8SF_FTYPE_V4SF
:
31896 case V8SF_FTYPE_V8HI
:
31897 case V4SI_FTYPE_V4SI
:
31898 case V4SI_FTYPE_V16QI
:
31899 case V4SI_FTYPE_V4SF
:
31900 case V4SI_FTYPE_V8SI
:
31901 case V4SI_FTYPE_V8HI
:
31902 case V4SI_FTYPE_V4DF
:
31903 case V4SI_FTYPE_V2DF
:
31904 case V4HI_FTYPE_V4HI
:
31905 case V4DF_FTYPE_V4DF
:
31906 case V4DF_FTYPE_V4SI
:
31907 case V4DF_FTYPE_V4SF
:
31908 case V4DF_FTYPE_V2DF
:
31909 case V4SF_FTYPE_V4SF
:
31910 case V4SF_FTYPE_V4SI
:
31911 case V4SF_FTYPE_V8SF
:
31912 case V4SF_FTYPE_V4DF
:
31913 case V4SF_FTYPE_V8HI
:
31914 case V4SF_FTYPE_V2DF
:
31915 case V2DI_FTYPE_V2DI
:
31916 case V2DI_FTYPE_V16QI
:
31917 case V2DI_FTYPE_V8HI
:
31918 case V2DI_FTYPE_V4SI
:
31919 case V2DF_FTYPE_V2DF
:
31920 case V2DF_FTYPE_V4SI
:
31921 case V2DF_FTYPE_V4DF
:
31922 case V2DF_FTYPE_V4SF
:
31923 case V2DF_FTYPE_V2SI
:
31924 case V2SI_FTYPE_V2SI
:
31925 case V2SI_FTYPE_V4SF
:
31926 case V2SI_FTYPE_V2SF
:
31927 case V2SI_FTYPE_V2DF
:
31928 case V2SF_FTYPE_V2SF
:
31929 case V2SF_FTYPE_V2SI
:
31930 case V32QI_FTYPE_V32QI
:
31931 case V32QI_FTYPE_V16QI
:
31932 case V16HI_FTYPE_V16HI
:
31933 case V16HI_FTYPE_V8HI
:
31934 case V8SI_FTYPE_V8SI
:
31935 case V16HI_FTYPE_V16QI
:
31936 case V8SI_FTYPE_V16QI
:
31937 case V4DI_FTYPE_V16QI
:
31938 case V8SI_FTYPE_V8HI
:
31939 case V4DI_FTYPE_V8HI
:
31940 case V4DI_FTYPE_V4SI
:
31941 case V4DI_FTYPE_V2DI
:
31944 case V4SF_FTYPE_V4SF_VEC_MERGE
:
31945 case V2DF_FTYPE_V2DF_VEC_MERGE
:
31946 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
31947 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
31948 case V16QI_FTYPE_V16QI_V16QI
:
31949 case V16QI_FTYPE_V8HI_V8HI
:
31950 case V8QI_FTYPE_V8QI_V8QI
:
31951 case V8QI_FTYPE_V4HI_V4HI
:
31952 case V8HI_FTYPE_V8HI_V8HI
:
31953 case V8HI_FTYPE_V16QI_V16QI
:
31954 case V8HI_FTYPE_V4SI_V4SI
:
31955 case V8SF_FTYPE_V8SF_V8SF
:
31956 case V8SF_FTYPE_V8SF_V8SI
:
31957 case V4SI_FTYPE_V4SI_V4SI
:
31958 case V4SI_FTYPE_V8HI_V8HI
:
31959 case V4SI_FTYPE_V4SF_V4SF
:
31960 case V4SI_FTYPE_V2DF_V2DF
:
31961 case V4HI_FTYPE_V4HI_V4HI
:
31962 case V4HI_FTYPE_V8QI_V8QI
:
31963 case V4HI_FTYPE_V2SI_V2SI
:
31964 case V4DF_FTYPE_V4DF_V4DF
:
31965 case V4DF_FTYPE_V4DF_V4DI
:
31966 case V4SF_FTYPE_V4SF_V4SF
:
31967 case V4SF_FTYPE_V4SF_V4SI
:
31968 case V4SF_FTYPE_V4SF_V2SI
:
31969 case V4SF_FTYPE_V4SF_V2DF
:
31970 case V4SF_FTYPE_V4SF_DI
:
31971 case V4SF_FTYPE_V4SF_SI
:
31972 case V2DI_FTYPE_V2DI_V2DI
:
31973 case V2DI_FTYPE_V16QI_V16QI
:
31974 case V2DI_FTYPE_V4SI_V4SI
:
31975 case V2UDI_FTYPE_V4USI_V4USI
:
31976 case V2DI_FTYPE_V2DI_V16QI
:
31977 case V2DI_FTYPE_V2DF_V2DF
:
31978 case V2SI_FTYPE_V2SI_V2SI
:
31979 case V2SI_FTYPE_V4HI_V4HI
:
31980 case V2SI_FTYPE_V2SF_V2SF
:
31981 case V2DF_FTYPE_V2DF_V2DF
:
31982 case V2DF_FTYPE_V2DF_V4SF
:
31983 case V2DF_FTYPE_V2DF_V2DI
:
31984 case V2DF_FTYPE_V2DF_DI
:
31985 case V2DF_FTYPE_V2DF_SI
:
31986 case V2SF_FTYPE_V2SF_V2SF
:
31987 case V1DI_FTYPE_V1DI_V1DI
:
31988 case V1DI_FTYPE_V8QI_V8QI
:
31989 case V1DI_FTYPE_V2SI_V2SI
:
31990 case V32QI_FTYPE_V16HI_V16HI
:
31991 case V16HI_FTYPE_V8SI_V8SI
:
31992 case V32QI_FTYPE_V32QI_V32QI
:
31993 case V16HI_FTYPE_V32QI_V32QI
:
31994 case V16HI_FTYPE_V16HI_V16HI
:
31995 case V8SI_FTYPE_V4DF_V4DF
:
31996 case V8SI_FTYPE_V8SI_V8SI
:
31997 case V8SI_FTYPE_V16HI_V16HI
:
31998 case V4DI_FTYPE_V4DI_V4DI
:
31999 case V4DI_FTYPE_V8SI_V8SI
:
32000 case V4UDI_FTYPE_V8USI_V8USI
:
32001 if (comparison
== UNKNOWN
)
32002 return ix86_expand_binop_builtin (icode
, exp
, target
);
32005 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
32006 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
32007 gcc_assert (comparison
!= UNKNOWN
);
32011 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
32012 case V16HI_FTYPE_V16HI_SI_COUNT
:
32013 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
32014 case V8SI_FTYPE_V8SI_SI_COUNT
:
32015 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
32016 case V4DI_FTYPE_V4DI_INT_COUNT
:
32017 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
32018 case V8HI_FTYPE_V8HI_SI_COUNT
:
32019 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
32020 case V4SI_FTYPE_V4SI_SI_COUNT
:
32021 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
32022 case V4HI_FTYPE_V4HI_SI_COUNT
:
32023 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
32024 case V2DI_FTYPE_V2DI_SI_COUNT
:
32025 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
32026 case V2SI_FTYPE_V2SI_SI_COUNT
:
32027 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
32028 case V1DI_FTYPE_V1DI_SI_COUNT
:
32030 last_arg_count
= true;
32032 case UINT64_FTYPE_UINT64_UINT64
:
32033 case UINT_FTYPE_UINT_UINT
:
32034 case UINT_FTYPE_UINT_USHORT
:
32035 case UINT_FTYPE_UINT_UCHAR
:
32036 case UINT16_FTYPE_UINT16_INT
:
32037 case UINT8_FTYPE_UINT8_INT
:
32040 case V2DI_FTYPE_V2DI_INT_CONVERT
:
32043 nargs_constant
= 1;
32045 case V4DI_FTYPE_V4DI_INT_CONVERT
:
32048 nargs_constant
= 1;
32050 case V8HI_FTYPE_V8HI_INT
:
32051 case V8HI_FTYPE_V8SF_INT
:
32052 case V8HI_FTYPE_V4SF_INT
:
32053 case V8SF_FTYPE_V8SF_INT
:
32054 case V4SI_FTYPE_V4SI_INT
:
32055 case V4SI_FTYPE_V8SI_INT
:
32056 case V4HI_FTYPE_V4HI_INT
:
32057 case V4DF_FTYPE_V4DF_INT
:
32058 case V4SF_FTYPE_V4SF_INT
:
32059 case V4SF_FTYPE_V8SF_INT
:
32060 case V2DI_FTYPE_V2DI_INT
:
32061 case V2DF_FTYPE_V2DF_INT
:
32062 case V2DF_FTYPE_V4DF_INT
:
32063 case V16HI_FTYPE_V16HI_INT
:
32064 case V8SI_FTYPE_V8SI_INT
:
32065 case V4DI_FTYPE_V4DI_INT
:
32066 case V2DI_FTYPE_V4DI_INT
:
32068 nargs_constant
= 1;
32070 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
32071 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
32072 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
32073 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
32074 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
32075 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
32078 case V32QI_FTYPE_V32QI_V32QI_INT
:
32079 case V16HI_FTYPE_V16HI_V16HI_INT
:
32080 case V16QI_FTYPE_V16QI_V16QI_INT
:
32081 case V4DI_FTYPE_V4DI_V4DI_INT
:
32082 case V8HI_FTYPE_V8HI_V8HI_INT
:
32083 case V8SI_FTYPE_V8SI_V8SI_INT
:
32084 case V8SI_FTYPE_V8SI_V4SI_INT
:
32085 case V8SF_FTYPE_V8SF_V8SF_INT
:
32086 case V8SF_FTYPE_V8SF_V4SF_INT
:
32087 case V4SI_FTYPE_V4SI_V4SI_INT
:
32088 case V4DF_FTYPE_V4DF_V4DF_INT
:
32089 case V4DF_FTYPE_V4DF_V2DF_INT
:
32090 case V4SF_FTYPE_V4SF_V4SF_INT
:
32091 case V2DI_FTYPE_V2DI_V2DI_INT
:
32092 case V4DI_FTYPE_V4DI_V2DI_INT
:
32093 case V2DF_FTYPE_V2DF_V2DF_INT
:
32095 nargs_constant
= 1;
32097 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
32100 nargs_constant
= 1;
32102 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
32105 nargs_constant
= 1;
32107 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
32110 nargs_constant
= 1;
32112 case V2DI_FTYPE_V2DI_UINT_UINT
:
32114 nargs_constant
= 2;
32116 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
32117 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
32118 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
32119 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
32121 nargs_constant
= 1;
32123 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
32125 nargs_constant
= 2;
32127 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
32128 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
32132 gcc_unreachable ();
32135 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32137 if (comparison
!= UNKNOWN
)
32139 gcc_assert (nargs
== 2);
32140 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
32143 if (rmode
== VOIDmode
|| rmode
== tmode
)
32147 || GET_MODE (target
) != tmode
32148 || !insn_p
->operand
[0].predicate (target
, tmode
))
32149 target
= gen_reg_rtx (tmode
);
32150 real_target
= target
;
32154 real_target
= gen_reg_rtx (tmode
);
32155 target
= simplify_gen_subreg (rmode
, real_target
, tmode
, 0);
32158 for (i
= 0; i
< nargs
; i
++)
32160 tree arg
= CALL_EXPR_ARG (exp
, i
);
32161 rtx op
= expand_normal (arg
);
32162 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32163 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32165 if (last_arg_count
&& (i
+ 1) == nargs
)
32167 /* SIMD shift insns take either an 8-bit immediate or
32168 register as count. But builtin functions take int as
32169 count. If count doesn't match, we put it in register. */
32172 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
32173 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
32174 op
= copy_to_reg (op
);
32177 else if ((nargs
- i
) <= nargs_constant
)
32182 case CODE_FOR_avx2_inserti128
:
32183 case CODE_FOR_avx2_extracti128
:
32184 error ("the last argument must be an 1-bit immediate");
32187 case CODE_FOR_sse4_1_roundsd
:
32188 case CODE_FOR_sse4_1_roundss
:
32190 case CODE_FOR_sse4_1_roundpd
:
32191 case CODE_FOR_sse4_1_roundps
:
32192 case CODE_FOR_avx_roundpd256
:
32193 case CODE_FOR_avx_roundps256
:
32195 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
32196 case CODE_FOR_sse4_1_roundps_sfix
:
32197 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
32198 case CODE_FOR_avx_roundps_sfix256
:
32200 case CODE_FOR_sse4_1_blendps
:
32201 case CODE_FOR_avx_blendpd256
:
32202 case CODE_FOR_avx_vpermilv4df
:
32203 error ("the last argument must be a 4-bit immediate");
32206 case CODE_FOR_sse4_1_blendpd
:
32207 case CODE_FOR_avx_vpermilv2df
:
32208 case CODE_FOR_xop_vpermil2v2df3
:
32209 case CODE_FOR_xop_vpermil2v4sf3
:
32210 case CODE_FOR_xop_vpermil2v4df3
:
32211 case CODE_FOR_xop_vpermil2v8sf3
:
32212 error ("the last argument must be a 2-bit immediate");
32215 case CODE_FOR_avx_vextractf128v4df
:
32216 case CODE_FOR_avx_vextractf128v8sf
:
32217 case CODE_FOR_avx_vextractf128v8si
:
32218 case CODE_FOR_avx_vinsertf128v4df
:
32219 case CODE_FOR_avx_vinsertf128v8sf
:
32220 case CODE_FOR_avx_vinsertf128v8si
:
32221 error ("the last argument must be a 1-bit immediate");
32224 case CODE_FOR_avx_vmcmpv2df3
:
32225 case CODE_FOR_avx_vmcmpv4sf3
:
32226 case CODE_FOR_avx_cmpv2df3
:
32227 case CODE_FOR_avx_cmpv4sf3
:
32228 case CODE_FOR_avx_cmpv4df3
:
32229 case CODE_FOR_avx_cmpv8sf3
:
32230 error ("the last argument must be a 5-bit immediate");
32234 switch (nargs_constant
)
32237 if ((nargs
- i
) == nargs_constant
)
32239 error ("the next to last argument must be an 8-bit immediate");
32243 error ("the last argument must be an 8-bit immediate");
32246 gcc_unreachable ();
32253 if (VECTOR_MODE_P (mode
))
32254 op
= safe_vector_operand (op
, mode
);
32256 /* If we aren't optimizing, only allow one memory operand to
32258 if (memory_operand (op
, mode
))
32261 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
32263 if (optimize
|| !match
|| num_memory
> 1)
32264 op
= copy_to_mode_reg (mode
, op
);
32268 op
= copy_to_reg (op
);
32269 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
32274 args
[i
].mode
= mode
;
32280 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
32283 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
32286 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32290 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32291 args
[2].op
, args
[3].op
);
32294 gcc_unreachable ();
32304 /* Subroutine of ix86_expand_builtin to take care of special insns
32305 with variable number of operands. */
32308 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
32309 tree exp
, rtx target
)
32313 unsigned int i
, nargs
, arg_adjust
, memory
;
32317 enum machine_mode mode
;
32319 enum insn_code icode
= d
->icode
;
32320 bool last_arg_constant
= false;
32321 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
32322 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
32323 enum { load
, store
} klass
;
32325 switch ((enum ix86_builtin_func_type
) d
->flag
)
32327 case VOID_FTYPE_VOID
:
32328 emit_insn (GEN_FCN (icode
) (target
));
32330 case VOID_FTYPE_UINT64
:
32331 case VOID_FTYPE_UNSIGNED
:
32337 case INT_FTYPE_VOID
:
32338 case UINT64_FTYPE_VOID
:
32339 case UNSIGNED_FTYPE_VOID
:
32344 case UINT64_FTYPE_PUNSIGNED
:
32345 case V2DI_FTYPE_PV2DI
:
32346 case V4DI_FTYPE_PV4DI
:
32347 case V32QI_FTYPE_PCCHAR
:
32348 case V16QI_FTYPE_PCCHAR
:
32349 case V8SF_FTYPE_PCV4SF
:
32350 case V8SF_FTYPE_PCFLOAT
:
32351 case V4SF_FTYPE_PCFLOAT
:
32352 case V4DF_FTYPE_PCV2DF
:
32353 case V4DF_FTYPE_PCDOUBLE
:
32354 case V2DF_FTYPE_PCDOUBLE
:
32355 case VOID_FTYPE_PVOID
:
32360 case VOID_FTYPE_PV2SF_V4SF
:
32361 case VOID_FTYPE_PV4DI_V4DI
:
32362 case VOID_FTYPE_PV2DI_V2DI
:
32363 case VOID_FTYPE_PCHAR_V32QI
:
32364 case VOID_FTYPE_PCHAR_V16QI
:
32365 case VOID_FTYPE_PFLOAT_V8SF
:
32366 case VOID_FTYPE_PFLOAT_V4SF
:
32367 case VOID_FTYPE_PDOUBLE_V4DF
:
32368 case VOID_FTYPE_PDOUBLE_V2DF
:
32369 case VOID_FTYPE_PLONGLONG_LONGLONG
:
32370 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
32371 case VOID_FTYPE_PINT_INT
:
32374 /* Reserve memory operand for target. */
32375 memory
= ARRAY_SIZE (args
);
32377 case V4SF_FTYPE_V4SF_PCV2SF
:
32378 case V2DF_FTYPE_V2DF_PCDOUBLE
:
32383 case V8SF_FTYPE_PCV8SF_V8SI
:
32384 case V4DF_FTYPE_PCV4DF_V4DI
:
32385 case V4SF_FTYPE_PCV4SF_V4SI
:
32386 case V2DF_FTYPE_PCV2DF_V2DI
:
32387 case V8SI_FTYPE_PCV8SI_V8SI
:
32388 case V4DI_FTYPE_PCV4DI_V4DI
:
32389 case V4SI_FTYPE_PCV4SI_V4SI
:
32390 case V2DI_FTYPE_PCV2DI_V2DI
:
32395 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
32396 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
32397 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
32398 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
32399 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
32400 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
32401 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
32402 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
32405 /* Reserve memory operand for target. */
32406 memory
= ARRAY_SIZE (args
);
32408 case VOID_FTYPE_UINT_UINT_UINT
:
32409 case VOID_FTYPE_UINT64_UINT_UINT
:
32410 case UCHAR_FTYPE_UINT_UINT_UINT
:
32411 case UCHAR_FTYPE_UINT64_UINT_UINT
:
32414 memory
= ARRAY_SIZE (args
);
32415 last_arg_constant
= true;
32418 gcc_unreachable ();
32421 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32423 if (klass
== store
)
32425 arg
= CALL_EXPR_ARG (exp
, 0);
32426 op
= expand_normal (arg
);
32427 gcc_assert (target
== 0);
32430 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32431 target
= gen_rtx_MEM (tmode
, op
);
32434 target
= force_reg (tmode
, op
);
32442 || !register_operand (target
, tmode
)
32443 || GET_MODE (target
) != tmode
)
32444 target
= gen_reg_rtx (tmode
);
32447 for (i
= 0; i
< nargs
; i
++)
32449 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32452 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
32453 op
= expand_normal (arg
);
32454 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32456 if (last_arg_constant
&& (i
+ 1) == nargs
)
32460 if (icode
== CODE_FOR_lwp_lwpvalsi3
32461 || icode
== CODE_FOR_lwp_lwpinssi3
32462 || icode
== CODE_FOR_lwp_lwpvaldi3
32463 || icode
== CODE_FOR_lwp_lwpinsdi3
)
32464 error ("the last argument must be a 32-bit immediate");
32466 error ("the last argument must be an 8-bit immediate");
32474 /* This must be the memory operand. */
32475 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32476 op
= gen_rtx_MEM (mode
, op
);
32477 gcc_assert (GET_MODE (op
) == mode
32478 || GET_MODE (op
) == VOIDmode
);
32482 /* This must be register. */
32483 if (VECTOR_MODE_P (mode
))
32484 op
= safe_vector_operand (op
, mode
);
32486 gcc_assert (GET_MODE (op
) == mode
32487 || GET_MODE (op
) == VOIDmode
);
32488 op
= copy_to_mode_reg (mode
, op
);
32493 args
[i
].mode
= mode
;
32499 pat
= GEN_FCN (icode
) (target
);
32502 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
32505 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
32508 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
32511 gcc_unreachable ();
32517 return klass
== store
? 0 : target
;
32520 /* Return the integer constant in ARG. Constrain it to be in the range
32521 of the subparts of VEC_TYPE; issue an error if not. */
32524 get_element_number (tree vec_type
, tree arg
)
32526 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
32528 if (!host_integerp (arg
, 1)
32529 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
32531 error ("selector must be an integer constant in the range 0..%wi", max
);
32538 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32539 ix86_expand_vector_init. We DO have language-level syntax for this, in
32540 the form of (type){ init-list }. Except that since we can't place emms
32541 instructions from inside the compiler, we can't allow the use of MMX
32542 registers unless the user explicitly asks for it. So we do *not* define
32543 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
32544 we have builtins invoked by mmintrin.h that gives us license to emit
32545 these sorts of instructions. */
32548 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
32550 enum machine_mode tmode
= TYPE_MODE (type
);
32551 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
32552 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
32553 rtvec v
= rtvec_alloc (n_elt
);
32555 gcc_assert (VECTOR_MODE_P (tmode
));
32556 gcc_assert (call_expr_nargs (exp
) == n_elt
);
32558 for (i
= 0; i
< n_elt
; ++i
)
32560 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
32561 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
32564 if (!target
|| !register_operand (target
, tmode
))
32565 target
= gen_reg_rtx (tmode
);
32567 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
32571 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32572 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
32573 had a language-level syntax for referencing vector elements. */
32576 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
32578 enum machine_mode tmode
, mode0
;
32583 arg0
= CALL_EXPR_ARG (exp
, 0);
32584 arg1
= CALL_EXPR_ARG (exp
, 1);
32586 op0
= expand_normal (arg0
);
32587 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32589 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32590 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32591 gcc_assert (VECTOR_MODE_P (mode0
));
32593 op0
= force_reg (mode0
, op0
);
32595 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32596 target
= gen_reg_rtx (tmode
);
32598 ix86_expand_vector_extract (true, target
, op0
, elt
);
32603 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32604 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32605 a language-level syntax for referencing vector elements. */
32608 ix86_expand_vec_set_builtin (tree exp
)
32610 enum machine_mode tmode
, mode1
;
32611 tree arg0
, arg1
, arg2
;
32613 rtx op0
, op1
, target
;
32615 arg0
= CALL_EXPR_ARG (exp
, 0);
32616 arg1
= CALL_EXPR_ARG (exp
, 1);
32617 arg2
= CALL_EXPR_ARG (exp
, 2);
32619 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32620 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32621 gcc_assert (VECTOR_MODE_P (tmode
));
32623 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32624 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32625 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32627 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32628 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32630 op0
= force_reg (tmode
, op0
);
32631 op1
= force_reg (mode1
, op1
);
32633 /* OP0 is the source of these builtin functions and shouldn't be
32634 modified. Create a copy, use it and return it as target. */
32635 target
= gen_reg_rtx (tmode
);
32636 emit_move_insn (target
, op0
);
32637 ix86_expand_vector_set (true, target
, op1
, elt
);
32642 /* Expand an expression EXP that calls a built-in function,
32643 with result going to TARGET if that's convenient
32644 (and in mode MODE if that's convenient).
32645 SUBTARGET may be used as the target for computing one of EXP's operands.
32646 IGNORE is nonzero if the value is to be ignored. */
32649 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
32650 enum machine_mode mode
, int ignore
)
32652 const struct builtin_description
*d
;
32654 enum insn_code icode
;
32655 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32656 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32657 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32658 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32659 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32661 /* For CPU builtins that can be folded, fold first and expand the fold. */
32664 case IX86_BUILTIN_CPU_INIT
:
32666 /* Make it call __cpu_indicator_init in libgcc. */
32667 tree call_expr
, fndecl
, type
;
32668 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32669 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32670 call_expr
= build_call_expr (fndecl
, 0);
32671 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32673 case IX86_BUILTIN_CPU_IS
:
32674 case IX86_BUILTIN_CPU_SUPPORTS
:
32676 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32677 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32678 gcc_assert (fold_expr
!= NULL_TREE
);
32679 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32683 /* Determine whether the builtin function is available under the current ISA.
32684 Originally the builtin was not created if it wasn't applicable to the
32685 current ISA based on the command line switches. With function specific
32686 options, we need to check in the context of the function making the call
32687 whether it is supported. */
32688 if (ix86_builtins_isa
[fcode
].isa
32689 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32691 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32692 NULL
, (enum fpmath_unit
) 0, false);
32695 error ("%qE needs unknown isa option", fndecl
);
32698 gcc_assert (opts
!= NULL
);
32699 error ("%qE needs isa option %s", fndecl
, opts
);
32707 case IX86_BUILTIN_MASKMOVQ
:
32708 case IX86_BUILTIN_MASKMOVDQU
:
32709 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32710 ? CODE_FOR_mmx_maskmovq
32711 : CODE_FOR_sse2_maskmovdqu
);
32712 /* Note the arg order is different from the operand order. */
32713 arg1
= CALL_EXPR_ARG (exp
, 0);
32714 arg2
= CALL_EXPR_ARG (exp
, 1);
32715 arg0
= CALL_EXPR_ARG (exp
, 2);
32716 op0
= expand_normal (arg0
);
32717 op1
= expand_normal (arg1
);
32718 op2
= expand_normal (arg2
);
32719 mode0
= insn_data
[icode
].operand
[0].mode
;
32720 mode1
= insn_data
[icode
].operand
[1].mode
;
32721 mode2
= insn_data
[icode
].operand
[2].mode
;
32723 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32724 op0
= gen_rtx_MEM (mode1
, op0
);
32726 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32727 op0
= copy_to_mode_reg (mode0
, op0
);
32728 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32729 op1
= copy_to_mode_reg (mode1
, op1
);
32730 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32731 op2
= copy_to_mode_reg (mode2
, op2
);
32732 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32738 case IX86_BUILTIN_LDMXCSR
:
32739 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32740 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32741 emit_move_insn (target
, op0
);
32742 emit_insn (gen_sse_ldmxcsr (target
));
32745 case IX86_BUILTIN_STMXCSR
:
32746 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32747 emit_insn (gen_sse_stmxcsr (target
));
32748 return copy_to_mode_reg (SImode
, target
);
32750 case IX86_BUILTIN_CLFLUSH
:
32751 arg0
= CALL_EXPR_ARG (exp
, 0);
32752 op0
= expand_normal (arg0
);
32753 icode
= CODE_FOR_sse2_clflush
;
32754 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32755 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32757 emit_insn (gen_sse2_clflush (op0
));
32760 case IX86_BUILTIN_MONITOR
:
32761 arg0
= CALL_EXPR_ARG (exp
, 0);
32762 arg1
= CALL_EXPR_ARG (exp
, 1);
32763 arg2
= CALL_EXPR_ARG (exp
, 2);
32764 op0
= expand_normal (arg0
);
32765 op1
= expand_normal (arg1
);
32766 op2
= expand_normal (arg2
);
32768 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32770 op1
= copy_to_mode_reg (SImode
, op1
);
32772 op2
= copy_to_mode_reg (SImode
, op2
);
32773 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32776 case IX86_BUILTIN_MWAIT
:
32777 arg0
= CALL_EXPR_ARG (exp
, 0);
32778 arg1
= CALL_EXPR_ARG (exp
, 1);
32779 op0
= expand_normal (arg0
);
32780 op1
= expand_normal (arg1
);
32782 op0
= copy_to_mode_reg (SImode
, op0
);
32784 op1
= copy_to_mode_reg (SImode
, op1
);
32785 emit_insn (gen_sse3_mwait (op0
, op1
));
32788 case IX86_BUILTIN_VEC_INIT_V2SI
:
32789 case IX86_BUILTIN_VEC_INIT_V4HI
:
32790 case IX86_BUILTIN_VEC_INIT_V8QI
:
32791 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32793 case IX86_BUILTIN_VEC_EXT_V2DF
:
32794 case IX86_BUILTIN_VEC_EXT_V2DI
:
32795 case IX86_BUILTIN_VEC_EXT_V4SF
:
32796 case IX86_BUILTIN_VEC_EXT_V4SI
:
32797 case IX86_BUILTIN_VEC_EXT_V8HI
:
32798 case IX86_BUILTIN_VEC_EXT_V2SI
:
32799 case IX86_BUILTIN_VEC_EXT_V4HI
:
32800 case IX86_BUILTIN_VEC_EXT_V16QI
:
32801 return ix86_expand_vec_ext_builtin (exp
, target
);
32803 case IX86_BUILTIN_VEC_SET_V2DI
:
32804 case IX86_BUILTIN_VEC_SET_V4SF
:
32805 case IX86_BUILTIN_VEC_SET_V4SI
:
32806 case IX86_BUILTIN_VEC_SET_V8HI
:
32807 case IX86_BUILTIN_VEC_SET_V4HI
:
32808 case IX86_BUILTIN_VEC_SET_V16QI
:
32809 return ix86_expand_vec_set_builtin (exp
);
32811 case IX86_BUILTIN_INFQ
:
32812 case IX86_BUILTIN_HUGE_VALQ
:
32814 REAL_VALUE_TYPE inf
;
32818 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32820 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32823 target
= gen_reg_rtx (mode
);
32825 emit_move_insn (target
, tmp
);
32829 case IX86_BUILTIN_RDPMC
:
32830 case IX86_BUILTIN_RDTSC
:
32831 case IX86_BUILTIN_RDTSCP
:
32833 op0
= gen_reg_rtx (DImode
);
32834 op1
= gen_reg_rtx (DImode
);
32836 if (fcode
== IX86_BUILTIN_RDPMC
)
32838 arg0
= CALL_EXPR_ARG (exp
, 0);
32839 op2
= expand_normal (arg0
);
32840 if (!register_operand (op2
, SImode
))
32841 op2
= copy_to_mode_reg (SImode
, op2
);
32843 insn
= (TARGET_64BIT
32844 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32845 : gen_rdpmc (op0
, op2
));
32848 else if (fcode
== IX86_BUILTIN_RDTSC
)
32850 insn
= (TARGET_64BIT
32851 ? gen_rdtsc_rex64 (op0
, op1
)
32852 : gen_rdtsc (op0
));
32857 op2
= gen_reg_rtx (SImode
);
32859 insn
= (TARGET_64BIT
32860 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
32861 : gen_rdtscp (op0
, op2
));
32864 arg0
= CALL_EXPR_ARG (exp
, 0);
32865 op4
= expand_normal (arg0
);
32866 if (!address_operand (op4
, VOIDmode
))
32868 op4
= convert_memory_address (Pmode
, op4
);
32869 op4
= copy_addr_to_reg (op4
);
32871 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
32876 /* mode is VOIDmode if __builtin_rd* has been called
32878 if (mode
== VOIDmode
)
32880 target
= gen_reg_rtx (mode
);
32885 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
32886 op1
, 1, OPTAB_DIRECT
);
32887 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
32888 op0
, 1, OPTAB_DIRECT
);
32891 emit_move_insn (target
, op0
);
32894 case IX86_BUILTIN_FXSAVE
:
32895 case IX86_BUILTIN_FXRSTOR
:
32896 case IX86_BUILTIN_FXSAVE64
:
32897 case IX86_BUILTIN_FXRSTOR64
:
32900 case IX86_BUILTIN_FXSAVE
:
32901 icode
= CODE_FOR_fxsave
;
32903 case IX86_BUILTIN_FXRSTOR
:
32904 icode
= CODE_FOR_fxrstor
;
32906 case IX86_BUILTIN_FXSAVE64
:
32907 icode
= CODE_FOR_fxsave64
;
32909 case IX86_BUILTIN_FXRSTOR64
:
32910 icode
= CODE_FOR_fxrstor64
;
32913 gcc_unreachable ();
32916 arg0
= CALL_EXPR_ARG (exp
, 0);
32917 op0
= expand_normal (arg0
);
32919 if (!address_operand (op0
, VOIDmode
))
32921 op0
= convert_memory_address (Pmode
, op0
);
32922 op0
= copy_addr_to_reg (op0
);
32924 op0
= gen_rtx_MEM (BLKmode
, op0
);
32926 pat
= GEN_FCN (icode
) (op0
);
32931 case IX86_BUILTIN_XSAVE
:
32932 case IX86_BUILTIN_XRSTOR
:
32933 case IX86_BUILTIN_XSAVE64
:
32934 case IX86_BUILTIN_XRSTOR64
:
32935 case IX86_BUILTIN_XSAVEOPT
:
32936 case IX86_BUILTIN_XSAVEOPT64
:
32937 arg0
= CALL_EXPR_ARG (exp
, 0);
32938 arg1
= CALL_EXPR_ARG (exp
, 1);
32939 op0
= expand_normal (arg0
);
32940 op1
= expand_normal (arg1
);
32942 if (!address_operand (op0
, VOIDmode
))
32944 op0
= convert_memory_address (Pmode
, op0
);
32945 op0
= copy_addr_to_reg (op0
);
32947 op0
= gen_rtx_MEM (BLKmode
, op0
);
32949 op1
= force_reg (DImode
, op1
);
32953 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
32954 NULL
, 1, OPTAB_DIRECT
);
32957 case IX86_BUILTIN_XSAVE
:
32958 icode
= CODE_FOR_xsave_rex64
;
32960 case IX86_BUILTIN_XRSTOR
:
32961 icode
= CODE_FOR_xrstor_rex64
;
32963 case IX86_BUILTIN_XSAVE64
:
32964 icode
= CODE_FOR_xsave64
;
32966 case IX86_BUILTIN_XRSTOR64
:
32967 icode
= CODE_FOR_xrstor64
;
32969 case IX86_BUILTIN_XSAVEOPT
:
32970 icode
= CODE_FOR_xsaveopt_rex64
;
32972 case IX86_BUILTIN_XSAVEOPT64
:
32973 icode
= CODE_FOR_xsaveopt64
;
32976 gcc_unreachable ();
32979 op2
= gen_lowpart (SImode
, op2
);
32980 op1
= gen_lowpart (SImode
, op1
);
32981 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32987 case IX86_BUILTIN_XSAVE
:
32988 icode
= CODE_FOR_xsave
;
32990 case IX86_BUILTIN_XRSTOR
:
32991 icode
= CODE_FOR_xrstor
;
32993 case IX86_BUILTIN_XSAVEOPT
:
32994 icode
= CODE_FOR_xsaveopt
;
32997 gcc_unreachable ();
32999 pat
= GEN_FCN (icode
) (op0
, op1
);
33006 case IX86_BUILTIN_LLWPCB
:
33007 arg0
= CALL_EXPR_ARG (exp
, 0);
33008 op0
= expand_normal (arg0
);
33009 icode
= CODE_FOR_lwp_llwpcb
;
33010 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
33011 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
33012 emit_insn (gen_lwp_llwpcb (op0
));
33015 case IX86_BUILTIN_SLWPCB
:
33016 icode
= CODE_FOR_lwp_slwpcb
;
33018 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
33019 target
= gen_reg_rtx (Pmode
);
33020 emit_insn (gen_lwp_slwpcb (target
));
33023 case IX86_BUILTIN_BEXTRI32
:
33024 case IX86_BUILTIN_BEXTRI64
:
33025 arg0
= CALL_EXPR_ARG (exp
, 0);
33026 arg1
= CALL_EXPR_ARG (exp
, 1);
33027 op0
= expand_normal (arg0
);
33028 op1
= expand_normal (arg1
);
33029 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
33030 ? CODE_FOR_tbm_bextri_si
33031 : CODE_FOR_tbm_bextri_di
);
33032 if (!CONST_INT_P (op1
))
33034 error ("last argument must be an immediate");
33039 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
33040 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
33041 op1
= GEN_INT (length
);
33042 op2
= GEN_INT (lsb_index
);
33043 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
33049 case IX86_BUILTIN_RDRAND16_STEP
:
33050 icode
= CODE_FOR_rdrandhi_1
;
33054 case IX86_BUILTIN_RDRAND32_STEP
:
33055 icode
= CODE_FOR_rdrandsi_1
;
33059 case IX86_BUILTIN_RDRAND64_STEP
:
33060 icode
= CODE_FOR_rdranddi_1
;
33064 op0
= gen_reg_rtx (mode0
);
33065 emit_insn (GEN_FCN (icode
) (op0
));
33067 arg0
= CALL_EXPR_ARG (exp
, 0);
33068 op1
= expand_normal (arg0
);
33069 if (!address_operand (op1
, VOIDmode
))
33071 op1
= convert_memory_address (Pmode
, op1
);
33072 op1
= copy_addr_to_reg (op1
);
33074 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33076 op1
= gen_reg_rtx (SImode
);
33077 emit_move_insn (op1
, CONST1_RTX (SImode
));
33079 /* Emit SImode conditional move. */
33080 if (mode0
== HImode
)
33082 op2
= gen_reg_rtx (SImode
);
33083 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
33085 else if (mode0
== SImode
)
33088 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
33091 target
= gen_reg_rtx (SImode
);
33093 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33095 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33096 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
33099 case IX86_BUILTIN_RDSEED16_STEP
:
33100 icode
= CODE_FOR_rdseedhi_1
;
33104 case IX86_BUILTIN_RDSEED32_STEP
:
33105 icode
= CODE_FOR_rdseedsi_1
;
33109 case IX86_BUILTIN_RDSEED64_STEP
:
33110 icode
= CODE_FOR_rdseeddi_1
;
33114 op0
= gen_reg_rtx (mode0
);
33115 emit_insn (GEN_FCN (icode
) (op0
));
33117 arg0
= CALL_EXPR_ARG (exp
, 0);
33118 op1
= expand_normal (arg0
);
33119 if (!address_operand (op1
, VOIDmode
))
33121 op1
= convert_memory_address (Pmode
, op1
);
33122 op1
= copy_addr_to_reg (op1
);
33124 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33126 op2
= gen_reg_rtx (QImode
);
33128 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33130 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
33133 target
= gen_reg_rtx (SImode
);
33135 emit_insn (gen_zero_extendqisi2 (target
, op2
));
33138 case IX86_BUILTIN_ADDCARRYX32
:
33139 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
33143 case IX86_BUILTIN_ADDCARRYX64
:
33144 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
33148 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
33149 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
33150 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
33151 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
33153 op0
= gen_reg_rtx (QImode
);
33155 /* Generate CF from input operand. */
33156 op1
= expand_normal (arg0
);
33157 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
33158 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
33160 /* Gen ADCX instruction to compute X+Y+CF. */
33161 op2
= expand_normal (arg1
);
33162 op3
= expand_normal (arg2
);
33165 op2
= copy_to_mode_reg (mode0
, op2
);
33167 op3
= copy_to_mode_reg (mode0
, op3
);
33169 op0
= gen_reg_rtx (mode0
);
33171 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
33172 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
33173 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
33175 /* Store the result. */
33176 op4
= expand_normal (arg3
);
33177 if (!address_operand (op4
, VOIDmode
))
33179 op4
= convert_memory_address (Pmode
, op4
);
33180 op4
= copy_addr_to_reg (op4
);
33182 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
33184 /* Return current CF value. */
33186 target
= gen_reg_rtx (QImode
);
33188 PUT_MODE (pat
, QImode
);
33189 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
33192 case IX86_BUILTIN_GATHERSIV2DF
:
33193 icode
= CODE_FOR_avx2_gathersiv2df
;
33195 case IX86_BUILTIN_GATHERSIV4DF
:
33196 icode
= CODE_FOR_avx2_gathersiv4df
;
33198 case IX86_BUILTIN_GATHERDIV2DF
:
33199 icode
= CODE_FOR_avx2_gatherdiv2df
;
33201 case IX86_BUILTIN_GATHERDIV4DF
:
33202 icode
= CODE_FOR_avx2_gatherdiv4df
;
33204 case IX86_BUILTIN_GATHERSIV4SF
:
33205 icode
= CODE_FOR_avx2_gathersiv4sf
;
33207 case IX86_BUILTIN_GATHERSIV8SF
:
33208 icode
= CODE_FOR_avx2_gathersiv8sf
;
33210 case IX86_BUILTIN_GATHERDIV4SF
:
33211 icode
= CODE_FOR_avx2_gatherdiv4sf
;
33213 case IX86_BUILTIN_GATHERDIV8SF
:
33214 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33216 case IX86_BUILTIN_GATHERSIV2DI
:
33217 icode
= CODE_FOR_avx2_gathersiv2di
;
33219 case IX86_BUILTIN_GATHERSIV4DI
:
33220 icode
= CODE_FOR_avx2_gathersiv4di
;
33222 case IX86_BUILTIN_GATHERDIV2DI
:
33223 icode
= CODE_FOR_avx2_gatherdiv2di
;
33225 case IX86_BUILTIN_GATHERDIV4DI
:
33226 icode
= CODE_FOR_avx2_gatherdiv4di
;
33228 case IX86_BUILTIN_GATHERSIV4SI
:
33229 icode
= CODE_FOR_avx2_gathersiv4si
;
33231 case IX86_BUILTIN_GATHERSIV8SI
:
33232 icode
= CODE_FOR_avx2_gathersiv8si
;
33234 case IX86_BUILTIN_GATHERDIV4SI
:
33235 icode
= CODE_FOR_avx2_gatherdiv4si
;
33237 case IX86_BUILTIN_GATHERDIV8SI
:
33238 icode
= CODE_FOR_avx2_gatherdiv8si
;
33240 case IX86_BUILTIN_GATHERALTSIV4DF
:
33241 icode
= CODE_FOR_avx2_gathersiv4df
;
33243 case IX86_BUILTIN_GATHERALTDIV8SF
:
33244 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33246 case IX86_BUILTIN_GATHERALTSIV4DI
:
33247 icode
= CODE_FOR_avx2_gathersiv4di
;
33249 case IX86_BUILTIN_GATHERALTDIV8SI
:
33250 icode
= CODE_FOR_avx2_gatherdiv8si
;
33254 arg0
= CALL_EXPR_ARG (exp
, 0);
33255 arg1
= CALL_EXPR_ARG (exp
, 1);
33256 arg2
= CALL_EXPR_ARG (exp
, 2);
33257 arg3
= CALL_EXPR_ARG (exp
, 3);
33258 arg4
= CALL_EXPR_ARG (exp
, 4);
33259 op0
= expand_normal (arg0
);
33260 op1
= expand_normal (arg1
);
33261 op2
= expand_normal (arg2
);
33262 op3
= expand_normal (arg3
);
33263 op4
= expand_normal (arg4
);
33264 /* Note the arg order is different from the operand order. */
33265 mode0
= insn_data
[icode
].operand
[1].mode
;
33266 mode2
= insn_data
[icode
].operand
[3].mode
;
33267 mode3
= insn_data
[icode
].operand
[4].mode
;
33268 mode4
= insn_data
[icode
].operand
[5].mode
;
33270 if (target
== NULL_RTX
33271 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
33272 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
33274 subtarget
= target
;
33276 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
33277 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
33279 rtx half
= gen_reg_rtx (V4SImode
);
33280 if (!nonimmediate_operand (op2
, V8SImode
))
33281 op2
= copy_to_mode_reg (V8SImode
, op2
);
33282 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
33285 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
33286 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
33288 rtx (*gen
) (rtx
, rtx
);
33289 rtx half
= gen_reg_rtx (mode0
);
33290 if (mode0
== V4SFmode
)
33291 gen
= gen_vec_extract_lo_v8sf
;
33293 gen
= gen_vec_extract_lo_v8si
;
33294 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
33295 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
33296 emit_insn (gen (half
, op0
));
33298 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
33299 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
33300 emit_insn (gen (half
, op3
));
33304 /* Force memory operand only with base register here. But we
33305 don't want to do it on memory operand for other builtin
33307 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
33309 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
33310 op0
= copy_to_mode_reg (mode0
, op0
);
33311 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
33312 op1
= copy_to_mode_reg (Pmode
, op1
);
33313 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
33314 op2
= copy_to_mode_reg (mode2
, op2
);
33315 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
33316 op3
= copy_to_mode_reg (mode3
, op3
);
33317 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
33319 error ("last argument must be scale 1, 2, 4, 8");
33323 /* Optimize. If mask is known to have all high bits set,
33324 replace op0 with pc_rtx to signal that the instruction
33325 overwrites the whole destination and doesn't use its
33326 previous contents. */
33329 if (TREE_CODE (arg3
) == VECTOR_CST
)
33331 unsigned int negative
= 0;
33332 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
33334 tree cst
= VECTOR_CST_ELT (arg3
, i
);
33335 if (TREE_CODE (cst
) == INTEGER_CST
33336 && tree_int_cst_sign_bit (cst
))
33338 else if (TREE_CODE (cst
) == REAL_CST
33339 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
33342 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
33345 else if (TREE_CODE (arg3
) == SSA_NAME
)
33347 /* Recognize also when mask is like:
33348 __v2df src = _mm_setzero_pd ();
33349 __v2df mask = _mm_cmpeq_pd (src, src);
33351 __v8sf src = _mm256_setzero_ps ();
33352 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
33353 as that is a cheaper way to load all ones into
33354 a register than having to load a constant from
33356 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
33357 if (is_gimple_call (def_stmt
))
33359 tree fndecl
= gimple_call_fndecl (def_stmt
);
33361 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
33362 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
33364 case IX86_BUILTIN_CMPPD
:
33365 case IX86_BUILTIN_CMPPS
:
33366 case IX86_BUILTIN_CMPPD256
:
33367 case IX86_BUILTIN_CMPPS256
:
33368 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
33371 case IX86_BUILTIN_CMPEQPD
:
33372 case IX86_BUILTIN_CMPEQPS
:
33373 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
33374 && initializer_zerop (gimple_call_arg (def_stmt
,
33385 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
33390 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
33391 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
33393 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
33394 ? V4SFmode
: V4SImode
;
33395 if (target
== NULL_RTX
)
33396 target
= gen_reg_rtx (tmode
);
33397 if (tmode
== V4SFmode
)
33398 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
33400 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
33403 target
= subtarget
;
33407 case IX86_BUILTIN_XABORT
:
33408 icode
= CODE_FOR_xabort
;
33409 arg0
= CALL_EXPR_ARG (exp
, 0);
33410 op0
= expand_normal (arg0
);
33411 mode0
= insn_data
[icode
].operand
[0].mode
;
33412 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
33414 error ("the xabort's argument must be an 8-bit immediate");
33417 emit_insn (gen_xabort (op0
));
33424 for (i
= 0, d
= bdesc_special_args
;
33425 i
< ARRAY_SIZE (bdesc_special_args
);
33427 if (d
->code
== fcode
)
33428 return ix86_expand_special_args_builtin (d
, exp
, target
);
33430 for (i
= 0, d
= bdesc_args
;
33431 i
< ARRAY_SIZE (bdesc_args
);
33433 if (d
->code
== fcode
)
33436 case IX86_BUILTIN_FABSQ
:
33437 case IX86_BUILTIN_COPYSIGNQ
:
33439 /* Emit a normal call if SSE isn't available. */
33440 return expand_call (exp
, target
, ignore
);
33442 return ix86_expand_args_builtin (d
, exp
, target
);
33445 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
33446 if (d
->code
== fcode
)
33447 return ix86_expand_sse_comi (d
, exp
, target
);
33449 for (i
= 0, d
= bdesc_pcmpestr
;
33450 i
< ARRAY_SIZE (bdesc_pcmpestr
);
33452 if (d
->code
== fcode
)
33453 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
33455 for (i
= 0, d
= bdesc_pcmpistr
;
33456 i
< ARRAY_SIZE (bdesc_pcmpistr
);
33458 if (d
->code
== fcode
)
33459 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
33461 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
33462 if (d
->code
== fcode
)
33463 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
33464 (enum ix86_builtin_func_type
)
33465 d
->flag
, d
->comparison
);
33467 gcc_unreachable ();
33470 /* Returns a function decl for a vectorized version of the builtin function
33471 with builtin function code FN and the result vector type TYPE, or NULL_TREE
33472 if it is not available. */
33475 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
33478 enum machine_mode in_mode
, out_mode
;
33480 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
33482 if (TREE_CODE (type_out
) != VECTOR_TYPE
33483 || TREE_CODE (type_in
) != VECTOR_TYPE
33484 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
33487 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33488 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
33489 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33490 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33494 case BUILT_IN_SQRT
:
33495 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33497 if (out_n
== 2 && in_n
== 2)
33498 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
33499 else if (out_n
== 4 && in_n
== 4)
33500 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
33504 case BUILT_IN_SQRTF
:
33505 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33507 if (out_n
== 4 && in_n
== 4)
33508 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
33509 else if (out_n
== 8 && in_n
== 8)
33510 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
33514 case BUILT_IN_IFLOOR
:
33515 case BUILT_IN_LFLOOR
:
33516 case BUILT_IN_LLFLOOR
:
33517 /* The round insn does not trap on denormals. */
33518 if (flag_trapping_math
|| !TARGET_ROUND
)
33521 if (out_mode
== SImode
&& in_mode
== DFmode
)
33523 if (out_n
== 4 && in_n
== 2)
33524 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
33525 else if (out_n
== 8 && in_n
== 4)
33526 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
33530 case BUILT_IN_IFLOORF
:
33531 case BUILT_IN_LFLOORF
:
33532 case BUILT_IN_LLFLOORF
:
33533 /* The round insn does not trap on denormals. */
33534 if (flag_trapping_math
|| !TARGET_ROUND
)
33537 if (out_mode
== SImode
&& in_mode
== SFmode
)
33539 if (out_n
== 4 && in_n
== 4)
33540 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
33541 else if (out_n
== 8 && in_n
== 8)
33542 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
33546 case BUILT_IN_ICEIL
:
33547 case BUILT_IN_LCEIL
:
33548 case BUILT_IN_LLCEIL
:
33549 /* The round insn does not trap on denormals. */
33550 if (flag_trapping_math
|| !TARGET_ROUND
)
33553 if (out_mode
== SImode
&& in_mode
== DFmode
)
33555 if (out_n
== 4 && in_n
== 2)
33556 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
33557 else if (out_n
== 8 && in_n
== 4)
33558 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
33562 case BUILT_IN_ICEILF
:
33563 case BUILT_IN_LCEILF
:
33564 case BUILT_IN_LLCEILF
:
33565 /* The round insn does not trap on denormals. */
33566 if (flag_trapping_math
|| !TARGET_ROUND
)
33569 if (out_mode
== SImode
&& in_mode
== SFmode
)
33571 if (out_n
== 4 && in_n
== 4)
33572 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
33573 else if (out_n
== 8 && in_n
== 8)
33574 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
33578 case BUILT_IN_IRINT
:
33579 case BUILT_IN_LRINT
:
33580 case BUILT_IN_LLRINT
:
33581 if (out_mode
== SImode
&& in_mode
== DFmode
)
33583 if (out_n
== 4 && in_n
== 2)
33584 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
33585 else if (out_n
== 8 && in_n
== 4)
33586 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
33590 case BUILT_IN_IRINTF
:
33591 case BUILT_IN_LRINTF
:
33592 case BUILT_IN_LLRINTF
:
33593 if (out_mode
== SImode
&& in_mode
== SFmode
)
33595 if (out_n
== 4 && in_n
== 4)
33596 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
33597 else if (out_n
== 8 && in_n
== 8)
33598 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
33602 case BUILT_IN_IROUND
:
33603 case BUILT_IN_LROUND
:
33604 case BUILT_IN_LLROUND
:
33605 /* The round insn does not trap on denormals. */
33606 if (flag_trapping_math
|| !TARGET_ROUND
)
33609 if (out_mode
== SImode
&& in_mode
== DFmode
)
33611 if (out_n
== 4 && in_n
== 2)
33612 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33613 else if (out_n
== 8 && in_n
== 4)
33614 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33618 case BUILT_IN_IROUNDF
:
33619 case BUILT_IN_LROUNDF
:
33620 case BUILT_IN_LLROUNDF
:
33621 /* The round insn does not trap on denormals. */
33622 if (flag_trapping_math
|| !TARGET_ROUND
)
33625 if (out_mode
== SImode
&& in_mode
== SFmode
)
33627 if (out_n
== 4 && in_n
== 4)
33628 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33629 else if (out_n
== 8 && in_n
== 8)
33630 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33634 case BUILT_IN_COPYSIGN
:
33635 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33637 if (out_n
== 2 && in_n
== 2)
33638 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33639 else if (out_n
== 4 && in_n
== 4)
33640 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33644 case BUILT_IN_COPYSIGNF
:
33645 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33647 if (out_n
== 4 && in_n
== 4)
33648 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33649 else if (out_n
== 8 && in_n
== 8)
33650 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33654 case BUILT_IN_FLOOR
:
33655 /* The round insn does not trap on denormals. */
33656 if (flag_trapping_math
|| !TARGET_ROUND
)
33659 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33661 if (out_n
== 2 && in_n
== 2)
33662 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33663 else if (out_n
== 4 && in_n
== 4)
33664 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33668 case BUILT_IN_FLOORF
:
33669 /* The round insn does not trap on denormals. */
33670 if (flag_trapping_math
|| !TARGET_ROUND
)
33673 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33675 if (out_n
== 4 && in_n
== 4)
33676 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33677 else if (out_n
== 8 && in_n
== 8)
33678 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33682 case BUILT_IN_CEIL
:
33683 /* The round insn does not trap on denormals. */
33684 if (flag_trapping_math
|| !TARGET_ROUND
)
33687 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33689 if (out_n
== 2 && in_n
== 2)
33690 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33691 else if (out_n
== 4 && in_n
== 4)
33692 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33696 case BUILT_IN_CEILF
:
33697 /* The round insn does not trap on denormals. */
33698 if (flag_trapping_math
|| !TARGET_ROUND
)
33701 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33703 if (out_n
== 4 && in_n
== 4)
33704 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33705 else if (out_n
== 8 && in_n
== 8)
33706 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33710 case BUILT_IN_TRUNC
:
33711 /* The round insn does not trap on denormals. */
33712 if (flag_trapping_math
|| !TARGET_ROUND
)
33715 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33717 if (out_n
== 2 && in_n
== 2)
33718 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33719 else if (out_n
== 4 && in_n
== 4)
33720 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33724 case BUILT_IN_TRUNCF
:
33725 /* The round insn does not trap on denormals. */
33726 if (flag_trapping_math
|| !TARGET_ROUND
)
33729 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33731 if (out_n
== 4 && in_n
== 4)
33732 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33733 else if (out_n
== 8 && in_n
== 8)
33734 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33738 case BUILT_IN_RINT
:
33739 /* The round insn does not trap on denormals. */
33740 if (flag_trapping_math
|| !TARGET_ROUND
)
33743 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33745 if (out_n
== 2 && in_n
== 2)
33746 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33747 else if (out_n
== 4 && in_n
== 4)
33748 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33752 case BUILT_IN_RINTF
:
33753 /* The round insn does not trap on denormals. */
33754 if (flag_trapping_math
|| !TARGET_ROUND
)
33757 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33759 if (out_n
== 4 && in_n
== 4)
33760 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33761 else if (out_n
== 8 && in_n
== 8)
33762 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33766 case BUILT_IN_ROUND
:
33767 /* The round insn does not trap on denormals. */
33768 if (flag_trapping_math
|| !TARGET_ROUND
)
33771 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33773 if (out_n
== 2 && in_n
== 2)
33774 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33775 else if (out_n
== 4 && in_n
== 4)
33776 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33780 case BUILT_IN_ROUNDF
:
33781 /* The round insn does not trap on denormals. */
33782 if (flag_trapping_math
|| !TARGET_ROUND
)
33785 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33787 if (out_n
== 4 && in_n
== 4)
33788 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33789 else if (out_n
== 8 && in_n
== 8)
33790 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33795 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33797 if (out_n
== 2 && in_n
== 2)
33798 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33799 if (out_n
== 4 && in_n
== 4)
33800 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33804 case BUILT_IN_FMAF
:
33805 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33807 if (out_n
== 4 && in_n
== 4)
33808 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33809 if (out_n
== 8 && in_n
== 8)
33810 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33818 /* Dispatch to a handler for a vectorization library. */
33819 if (ix86_veclib_handler
)
33820 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33826 /* Handler for an SVML-style interface to
33827 a library with vectorized intrinsics. */
33830 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33833 tree fntype
, new_fndecl
, args
;
33836 enum machine_mode el_mode
, in_mode
;
33839 /* The SVML is suitable for unsafe math only. */
33840 if (!flag_unsafe_math_optimizations
)
33843 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33844 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33845 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33846 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33847 if (el_mode
!= in_mode
33855 case BUILT_IN_LOG10
:
33857 case BUILT_IN_TANH
:
33859 case BUILT_IN_ATAN
:
33860 case BUILT_IN_ATAN2
:
33861 case BUILT_IN_ATANH
:
33862 case BUILT_IN_CBRT
:
33863 case BUILT_IN_SINH
:
33865 case BUILT_IN_ASINH
:
33866 case BUILT_IN_ASIN
:
33867 case BUILT_IN_COSH
:
33869 case BUILT_IN_ACOSH
:
33870 case BUILT_IN_ACOS
:
33871 if (el_mode
!= DFmode
|| n
!= 2)
33875 case BUILT_IN_EXPF
:
33876 case BUILT_IN_LOGF
:
33877 case BUILT_IN_LOG10F
:
33878 case BUILT_IN_POWF
:
33879 case BUILT_IN_TANHF
:
33880 case BUILT_IN_TANF
:
33881 case BUILT_IN_ATANF
:
33882 case BUILT_IN_ATAN2F
:
33883 case BUILT_IN_ATANHF
:
33884 case BUILT_IN_CBRTF
:
33885 case BUILT_IN_SINHF
:
33886 case BUILT_IN_SINF
:
33887 case BUILT_IN_ASINHF
:
33888 case BUILT_IN_ASINF
:
33889 case BUILT_IN_COSHF
:
33890 case BUILT_IN_COSF
:
33891 case BUILT_IN_ACOSHF
:
33892 case BUILT_IN_ACOSF
:
33893 if (el_mode
!= SFmode
|| n
!= 4)
33901 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33903 if (fn
== BUILT_IN_LOGF
)
33904 strcpy (name
, "vmlsLn4");
33905 else if (fn
== BUILT_IN_LOG
)
33906 strcpy (name
, "vmldLn2");
33909 sprintf (name
, "vmls%s", bname
+10);
33910 name
[strlen (name
)-1] = '4';
33913 sprintf (name
, "vmld%s2", bname
+10);
33915 /* Convert to uppercase. */
33919 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33921 args
= TREE_CHAIN (args
))
33925 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33927 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33929 /* Build a function declaration for the vectorized function. */
33930 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33931 FUNCTION_DECL
, get_identifier (name
), fntype
);
33932 TREE_PUBLIC (new_fndecl
) = 1;
33933 DECL_EXTERNAL (new_fndecl
) = 1;
33934 DECL_IS_NOVOPS (new_fndecl
) = 1;
33935 TREE_READONLY (new_fndecl
) = 1;
33940 /* Handler for an ACML-style interface to
33941 a library with vectorized intrinsics. */
33944 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
33946 char name
[20] = "__vr.._";
33947 tree fntype
, new_fndecl
, args
;
33950 enum machine_mode el_mode
, in_mode
;
33953 /* The ACML is 64bits only and suitable for unsafe math only as
33954 it does not correctly support parts of IEEE with the required
33955 precision such as denormals. */
33957 || !flag_unsafe_math_optimizations
)
33960 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33961 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33962 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33963 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33964 if (el_mode
!= in_mode
33974 case BUILT_IN_LOG2
:
33975 case BUILT_IN_LOG10
:
33978 if (el_mode
!= DFmode
33983 case BUILT_IN_SINF
:
33984 case BUILT_IN_COSF
:
33985 case BUILT_IN_EXPF
:
33986 case BUILT_IN_POWF
:
33987 case BUILT_IN_LOGF
:
33988 case BUILT_IN_LOG2F
:
33989 case BUILT_IN_LOG10F
:
33992 if (el_mode
!= SFmode
34001 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
34002 sprintf (name
+ 7, "%s", bname
+10);
34005 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
34007 args
= TREE_CHAIN (args
))
34011 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
34013 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
34015 /* Build a function declaration for the vectorized function. */
34016 new_fndecl
= build_decl (BUILTINS_LOCATION
,
34017 FUNCTION_DECL
, get_identifier (name
), fntype
);
34018 TREE_PUBLIC (new_fndecl
) = 1;
34019 DECL_EXTERNAL (new_fndecl
) = 1;
34020 DECL_IS_NOVOPS (new_fndecl
) = 1;
34021 TREE_READONLY (new_fndecl
) = 1;
34026 /* Returns a decl of a function that implements gather load with
34027 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
34028 Return NULL_TREE if it is not available. */
34031 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
34032 const_tree index_type
, int scale
)
34035 enum ix86_builtins code
;
34040 if ((TREE_CODE (index_type
) != INTEGER_TYPE
34041 && !POINTER_TYPE_P (index_type
))
34042 || (TYPE_MODE (index_type
) != SImode
34043 && TYPE_MODE (index_type
) != DImode
))
34046 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
34049 /* v*gather* insn sign extends index to pointer mode. */
34050 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
34051 && TYPE_UNSIGNED (index_type
))
34056 || (scale
& (scale
- 1)) != 0)
34059 si
= TYPE_MODE (index_type
) == SImode
;
34060 switch (TYPE_MODE (mem_vectype
))
34063 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
34066 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
34069 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
34072 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
34075 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
34078 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
34081 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
34084 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
34090 return ix86_builtins
[code
];
34093 /* Returns a code for a target-specific builtin that implements
34094 reciprocal of the function, or NULL_TREE if not available. */
34097 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
34098 bool sqrt ATTRIBUTE_UNUSED
)
34100 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
34101 && flag_finite_math_only
&& !flag_trapping_math
34102 && flag_unsafe_math_optimizations
))
34106 /* Machine dependent builtins. */
34109 /* Vectorized version of sqrt to rsqrt conversion. */
34110 case IX86_BUILTIN_SQRTPS_NR
:
34111 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
34113 case IX86_BUILTIN_SQRTPS_NR256
:
34114 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
34120 /* Normal builtins. */
34123 /* Sqrt to rsqrt conversion. */
34124 case BUILT_IN_SQRTF
:
34125 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
34132 /* Helper for avx_vpermilps256_operand et al. This is also used by
34133 the expansion functions to turn the parallel back into a mask.
34134 The return value is 0 for no match and the imm8+1 for a match. */
34137 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
34139 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
34141 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34143 if (XVECLEN (par
, 0) != (int) nelt
)
34146 /* Validate that all of the elements are constants, and not totally
34147 out of range. Copy the data into an integral array to make the
34148 subsequent checks easier. */
34149 for (i
= 0; i
< nelt
; ++i
)
34151 rtx er
= XVECEXP (par
, 0, i
);
34152 unsigned HOST_WIDE_INT ei
;
34154 if (!CONST_INT_P (er
))
34165 /* In the 256-bit DFmode case, we can only move elements within
34167 for (i
= 0; i
< 2; ++i
)
34171 mask
|= ipar
[i
] << i
;
34173 for (i
= 2; i
< 4; ++i
)
34177 mask
|= (ipar
[i
] - 2) << i
;
34182 /* In the 256-bit SFmode case, we have full freedom of movement
34183 within the low 128-bit lane, but the high 128-bit lane must
34184 mirror the exact same pattern. */
34185 for (i
= 0; i
< 4; ++i
)
34186 if (ipar
[i
] + 4 != ipar
[i
+ 4])
34193 /* In the 128-bit case, we've full freedom in the placement of
34194 the elements from the source operand. */
34195 for (i
= 0; i
< nelt
; ++i
)
34196 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
34200 gcc_unreachable ();
34203 /* Make sure success has a non-zero value by adding one. */
34207 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
34208 the expansion functions to turn the parallel back into a mask.
34209 The return value is 0 for no match and the imm8+1 for a match. */
34212 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
34214 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
34216 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34218 if (XVECLEN (par
, 0) != (int) nelt
)
34221 /* Validate that all of the elements are constants, and not totally
34222 out of range. Copy the data into an integral array to make the
34223 subsequent checks easier. */
34224 for (i
= 0; i
< nelt
; ++i
)
34226 rtx er
= XVECEXP (par
, 0, i
);
34227 unsigned HOST_WIDE_INT ei
;
34229 if (!CONST_INT_P (er
))
34232 if (ei
>= 2 * nelt
)
34237 /* Validate that the halves of the permute are halves. */
34238 for (i
= 0; i
< nelt2
- 1; ++i
)
34239 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34241 for (i
= nelt2
; i
< nelt
- 1; ++i
)
34242 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34245 /* Reconstruct the mask. */
34246 for (i
= 0; i
< 2; ++i
)
34248 unsigned e
= ipar
[i
* nelt2
];
34252 mask
|= e
<< (i
* 4);
34255 /* Make sure success has a non-zero value by adding one. */
34259 /* Store OPERAND to the memory after reload is completed. This means
34260 that we can't easily use assign_stack_local. */
34262 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
34266 gcc_assert (reload_completed
);
34267 if (ix86_using_red_zone ())
34269 result
= gen_rtx_MEM (mode
,
34270 gen_rtx_PLUS (Pmode
,
34272 GEN_INT (-RED_ZONE_SIZE
)));
34273 emit_move_insn (result
, operand
);
34275 else if (TARGET_64BIT
)
34281 operand
= gen_lowpart (DImode
, operand
);
34285 gen_rtx_SET (VOIDmode
,
34286 gen_rtx_MEM (DImode
,
34287 gen_rtx_PRE_DEC (DImode
,
34288 stack_pointer_rtx
)),
34292 gcc_unreachable ();
34294 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34303 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
34305 gen_rtx_SET (VOIDmode
,
34306 gen_rtx_MEM (SImode
,
34307 gen_rtx_PRE_DEC (Pmode
,
34308 stack_pointer_rtx
)),
34311 gen_rtx_SET (VOIDmode
,
34312 gen_rtx_MEM (SImode
,
34313 gen_rtx_PRE_DEC (Pmode
,
34314 stack_pointer_rtx
)),
34319 /* Store HImodes as SImodes. */
34320 operand
= gen_lowpart (SImode
, operand
);
34324 gen_rtx_SET (VOIDmode
,
34325 gen_rtx_MEM (GET_MODE (operand
),
34326 gen_rtx_PRE_DEC (SImode
,
34327 stack_pointer_rtx
)),
34331 gcc_unreachable ();
34333 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34338 /* Free operand from the memory. */
34340 ix86_free_from_memory (enum machine_mode mode
)
34342 if (!ix86_using_red_zone ())
34346 if (mode
== DImode
|| TARGET_64BIT
)
34350 /* Use LEA to deallocate stack space. In peephole2 it will be converted
34351 to pop or add instruction if registers are available. */
34352 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
34353 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
34358 /* Return a register priority for hard reg REGNO. */
34360 ix86_register_priority (int hard_regno
)
34362 /* ebp and r13 as the base always wants a displacement, r12 as the
34363 base always wants an index. So discourage their usage in an
34365 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
34367 if (hard_regno
== BP_REG
)
34369 /* New x86-64 int registers result in bigger code size. Discourage
34371 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
34373 /* New x86-64 SSE registers result in bigger code size. Discourage
34375 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
34377 /* Usage of AX register results in smaller code. Prefer it. */
34378 if (hard_regno
== 0)
34383 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
34385 Put float CONST_DOUBLE in the constant pool instead of fp regs.
34386 QImode must go into class Q_REGS.
34387 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
34388 movdf to do mem-to-mem moves through integer regs. */
34391 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
34393 enum machine_mode mode
= GET_MODE (x
);
34395 /* We're only allowed to return a subclass of CLASS. Many of the
34396 following checks fail for NO_REGS, so eliminate that early. */
34397 if (regclass
== NO_REGS
)
34400 /* All classes can load zeros. */
34401 if (x
== CONST0_RTX (mode
))
34404 /* Force constants into memory if we are loading a (nonzero) constant into
34405 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
34406 instructions to load from a constant. */
34408 && (MAYBE_MMX_CLASS_P (regclass
)
34409 || MAYBE_SSE_CLASS_P (regclass
)
34410 || MAYBE_MASK_CLASS_P (regclass
)))
34413 /* Prefer SSE regs only, if we can use them for math. */
34414 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
34415 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34417 /* Floating-point constants need more complex checks. */
34418 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
34420 /* General regs can load everything. */
34421 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
34424 /* Floats can load 0 and 1 plus some others. Note that we eliminated
34425 zero above. We only want to wind up preferring 80387 registers if
34426 we plan on doing computation with them. */
34428 && standard_80387_constant_p (x
) > 0)
34430 /* Limit class to non-sse. */
34431 if (regclass
== FLOAT_SSE_REGS
)
34433 if (regclass
== FP_TOP_SSE_REGS
)
34435 if (regclass
== FP_SECOND_SSE_REGS
)
34436 return FP_SECOND_REG
;
34437 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
34444 /* Generally when we see PLUS here, it's the function invariant
34445 (plus soft-fp const_int). Which can only be computed into general
34447 if (GET_CODE (x
) == PLUS
)
34448 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
34450 /* QImode constants are easy to load, but non-constant QImode data
34451 must go into Q_REGS. */
34452 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
34454 if (reg_class_subset_p (regclass
, Q_REGS
))
34456 if (reg_class_subset_p (Q_REGS
, regclass
))
34464 /* Discourage putting floating-point values in SSE registers unless
34465 SSE math is being used, and likewise for the 387 registers. */
34467 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
34469 enum machine_mode mode
= GET_MODE (x
);
34471 /* Restrict the output reload class to the register bank that we are doing
34472 math on. If we would like not to return a subset of CLASS, reject this
34473 alternative: if reload cannot do this, it will still use its choice. */
34474 mode
= GET_MODE (x
);
34475 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
34476 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
34478 if (X87_FLOAT_MODE_P (mode
))
34480 if (regclass
== FP_TOP_SSE_REGS
)
34482 else if (regclass
== FP_SECOND_SSE_REGS
)
34483 return FP_SECOND_REG
;
34485 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34492 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
34493 enum machine_mode mode
, secondary_reload_info
*sri
)
34495 /* Double-word spills from general registers to non-offsettable memory
34496 references (zero-extended addresses) require special handling. */
34499 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
34500 && INTEGER_CLASS_P (rclass
)
34501 && !offsettable_memref_p (x
))
34504 ? CODE_FOR_reload_noff_load
34505 : CODE_FOR_reload_noff_store
);
34506 /* Add the cost of moving address to a temporary. */
34507 sri
->extra_cost
= 1;
34512 /* QImode spills from non-QI registers require
34513 intermediate register on 32bit targets. */
34515 && (MAYBE_MASK_CLASS_P (rclass
)
34516 || (!TARGET_64BIT
&& !in_p
34517 && INTEGER_CLASS_P (rclass
)
34518 && MAYBE_NON_Q_CLASS_P (rclass
))))
34527 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
34528 regno
= true_regnum (x
);
34530 /* Return Q_REGS if the operand is in memory. */
34535 /* This condition handles corner case where an expression involving
34536 pointers gets vectorized. We're trying to use the address of a
34537 stack slot as a vector initializer.
34539 (set (reg:V2DI 74 [ vect_cst_.2 ])
34540 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
34542 Eventually frame gets turned into sp+offset like this:
34544 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34545 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34546 (const_int 392 [0x188]))))
34548 That later gets turned into:
34550 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34551 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34552 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
34554 We'll have the following reload recorded:
34556 Reload 0: reload_in (DI) =
34557 (plus:DI (reg/f:DI 7 sp)
34558 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
34559 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34560 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
34561 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
34562 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34563 reload_reg_rtx: (reg:V2DI 22 xmm1)
34565 Which isn't going to work since SSE instructions can't handle scalar
34566 additions. Returning GENERAL_REGS forces the addition into integer
34567 register and reload can handle subsequent reloads without problems. */
34569 if (in_p
&& GET_CODE (x
) == PLUS
34570 && SSE_CLASS_P (rclass
)
34571 && SCALAR_INT_MODE_P (mode
))
34572 return GENERAL_REGS
;
34577 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
34580 ix86_class_likely_spilled_p (reg_class_t rclass
)
34591 case SSE_FIRST_REG
:
34593 case FP_SECOND_REG
:
34604 /* If we are copying between general and FP registers, we need a memory
34605 location. The same is true for SSE and MMX registers.
34607 To optimize register_move_cost performance, allow inline variant.
34609 The macro can't work reliably when one of the CLASSES is class containing
34610 registers from multiple units (SSE, MMX, integer). We avoid this by never
34611 combining those units in single alternative in the machine description.
34612 Ensure that this constraint holds to avoid unexpected surprises.
34614 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34615 enforce these sanity checks. */
34618 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34619 enum machine_mode mode
, int strict
)
34621 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34623 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34624 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34625 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34626 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34627 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34628 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34630 gcc_assert (!strict
|| lra_in_progress
);
34634 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34637 /* ??? This is a lie. We do have moves between mmx/general, and for
34638 mmx/sse2. But by saying we need secondary memory we discourage the
34639 register allocator from using the mmx registers unless needed. */
34640 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34643 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34645 /* SSE1 doesn't have any direct moves from other classes. */
34649 /* If the target says that inter-unit moves are more expensive
34650 than moving through memory, then don't generate them. */
34651 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34652 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34655 /* Between SSE and general, we have moves no larger than word size. */
34656 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34664 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34665 enum machine_mode mode
, int strict
)
34667 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34670 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34672 On the 80386, this is the size of MODE in words,
34673 except in the FP regs, where a single reg is always enough. */
34675 static unsigned char
34676 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34678 if (MAYBE_INTEGER_CLASS_P (rclass
))
34680 if (mode
== XFmode
)
34681 return (TARGET_64BIT
? 2 : 3);
34682 else if (mode
== XCmode
)
34683 return (TARGET_64BIT
? 4 : 6);
34685 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34689 if (COMPLEX_MODE_P (mode
))
34696 /* Return true if the registers in CLASS cannot represent the change from
34697 modes FROM to TO. */
34700 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34701 enum reg_class regclass
)
34706 /* x87 registers can't do subreg at all, as all values are reformatted
34707 to extended precision. */
34708 if (MAYBE_FLOAT_CLASS_P (regclass
))
34711 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34713 /* Vector registers do not support QI or HImode loads. If we don't
34714 disallow a change to these modes, reload will assume it's ok to
34715 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34716 the vec_dupv4hi pattern. */
34717 if (GET_MODE_SIZE (from
) < 4)
34720 /* Vector registers do not support subreg with nonzero offsets, which
34721 are otherwise valid for integer registers. Since we can't see
34722 whether we have a nonzero offset from here, prohibit all
34723 nonparadoxical subregs changing size. */
34724 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34731 /* Return the cost of moving data of mode M between a
34732 register and memory. A value of 2 is the default; this cost is
34733 relative to those in `REGISTER_MOVE_COST'.
34735 This function is used extensively by register_move_cost that is used to
34736 build tables at startup. Make it inline in this case.
34737 When IN is 2, return maximum of in and out move cost.
34739 If moving between registers and memory is more expensive than
34740 between two registers, you should define this macro to express the
34743 Model also increased moving costs of QImode registers in non
34747 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34751 if (FLOAT_CLASS_P (regclass
))
34769 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34770 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34772 if (SSE_CLASS_P (regclass
))
34775 switch (GET_MODE_SIZE (mode
))
34790 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34791 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34793 if (MMX_CLASS_P (regclass
))
34796 switch (GET_MODE_SIZE (mode
))
34808 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34809 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34811 switch (GET_MODE_SIZE (mode
))
34814 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34817 return ix86_cost
->int_store
[0];
34818 if (TARGET_PARTIAL_REG_DEPENDENCY
34819 && optimize_function_for_speed_p (cfun
))
34820 cost
= ix86_cost
->movzbl_load
;
34822 cost
= ix86_cost
->int_load
[0];
34824 return MAX (cost
, ix86_cost
->int_store
[0]);
34830 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34832 return ix86_cost
->movzbl_load
;
34834 return ix86_cost
->int_store
[0] + 4;
34839 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
34840 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
34842 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34843 if (mode
== TFmode
)
34846 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
34848 cost
= ix86_cost
->int_load
[2];
34850 cost
= ix86_cost
->int_store
[2];
34851 return (cost
* (((int) GET_MODE_SIZE (mode
)
34852 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
34857 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
34860 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
34864 /* Return the cost of moving data from a register in class CLASS1 to
34865 one in class CLASS2.
34867 It is not required that the cost always equal 2 when FROM is the same as TO;
34868 on some machines it is expensive to move between registers if they are not
34869 general registers. */
34872 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
34873 reg_class_t class2_i
)
34875 enum reg_class class1
= (enum reg_class
) class1_i
;
34876 enum reg_class class2
= (enum reg_class
) class2_i
;
34878 /* In case we require secondary memory, compute cost of the store followed
34879 by load. In order to avoid bad register allocation choices, we need
34880 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
34882 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
34886 cost
+= inline_memory_move_cost (mode
, class1
, 2);
34887 cost
+= inline_memory_move_cost (mode
, class2
, 2);
34889 /* In case of copying from general_purpose_register we may emit multiple
34890 stores followed by single load causing memory size mismatch stall.
34891 Count this as arbitrarily high cost of 20. */
34892 if (targetm
.class_max_nregs (class1
, mode
)
34893 > targetm
.class_max_nregs (class2
, mode
))
34896 /* In the case of FP/MMX moves, the registers actually overlap, and we
34897 have to switch modes in order to treat them differently. */
34898 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
34899 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
34905 /* Moves between SSE/MMX and integer unit are expensive. */
34906 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
34907 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34909 /* ??? By keeping returned value relatively high, we limit the number
34910 of moves between integer and MMX/SSE registers for all targets.
34911 Additionally, high value prevents problem with x86_modes_tieable_p(),
34912 where integer modes in MMX/SSE registers are not tieable
34913 because of missing QImode and HImode moves to, from or between
34914 MMX/SSE registers. */
34915 return MAX (8, ix86_cost
->mmxsse_to_integer
);
34917 if (MAYBE_FLOAT_CLASS_P (class1
))
34918 return ix86_cost
->fp_move
;
34919 if (MAYBE_SSE_CLASS_P (class1
))
34920 return ix86_cost
->sse_move
;
34921 if (MAYBE_MMX_CLASS_P (class1
))
34922 return ix86_cost
->mmx_move
;
34926 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34930 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
34932 /* Flags and only flags can only hold CCmode values. */
34933 if (CC_REGNO_P (regno
))
34934 return GET_MODE_CLASS (mode
) == MODE_CC
;
34935 if (GET_MODE_CLASS (mode
) == MODE_CC
34936 || GET_MODE_CLASS (mode
) == MODE_RANDOM
34937 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
34939 if (STACK_REGNO_P (regno
))
34940 return VALID_FP_MODE_P (mode
);
34941 if (MASK_REGNO_P (regno
))
34942 return VALID_MASK_REG_MODE (mode
);
34943 if (BND_REGNO_P (regno
))
34944 return VALID_BND_REG_MODE (mode
);
34945 if (SSE_REGNO_P (regno
))
34947 /* We implement the move patterns for all vector modes into and
34948 out of SSE registers, even when no operation instructions
34951 /* For AVX-512 we allow, regardless of regno:
34953 - any of 512-bit wide vector mode
34954 - any scalar mode. */
34957 || VALID_AVX512F_REG_MODE (mode
)
34958 || VALID_AVX512F_SCALAR_MODE (mode
)))
34961 /* xmm16-xmm31 are only available for AVX-512. */
34962 if (EXT_REX_SSE_REGNO_P (regno
))
34965 /* OImode move is available only when AVX is enabled. */
34966 return ((TARGET_AVX
&& mode
== OImode
)
34967 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34968 || VALID_SSE_REG_MODE (mode
)
34969 || VALID_SSE2_REG_MODE (mode
)
34970 || VALID_MMX_REG_MODE (mode
)
34971 || VALID_MMX_REG_MODE_3DNOW (mode
));
34973 if (MMX_REGNO_P (regno
))
34975 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34976 so if the register is available at all, then we can move data of
34977 the given mode into or out of it. */
34978 return (VALID_MMX_REG_MODE (mode
)
34979 || VALID_MMX_REG_MODE_3DNOW (mode
));
34982 if (mode
== QImode
)
34984 /* Take care for QImode values - they can be in non-QI regs,
34985 but then they do cause partial register stalls. */
34986 if (ANY_QI_REGNO_P (regno
))
34988 if (!TARGET_PARTIAL_REG_STALL
)
34990 /* LRA checks if the hard register is OK for the given mode.
34991 QImode values can live in non-QI regs, so we allow all
34993 if (lra_in_progress
)
34995 return !can_create_pseudo_p ();
34997 /* We handle both integer and floats in the general purpose registers. */
34998 else if (VALID_INT_MODE_P (mode
))
35000 else if (VALID_FP_MODE_P (mode
))
35002 else if (VALID_DFP_MODE_P (mode
))
35004 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
35005 on to use that value in smaller contexts, this can easily force a
35006 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
35007 supporting DImode, allow it. */
35008 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
35014 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
35015 tieable integer mode. */
35018 ix86_tieable_integer_mode_p (enum machine_mode mode
)
35027 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
35030 return TARGET_64BIT
;
35037 /* Return true if MODE1 is accessible in a register that can hold MODE2
35038 without copying. That is, all register classes that can hold MODE2
35039 can also hold MODE1. */
35042 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
35044 if (mode1
== mode2
)
35047 if (ix86_tieable_integer_mode_p (mode1
)
35048 && ix86_tieable_integer_mode_p (mode2
))
35051 /* MODE2 being XFmode implies fp stack or general regs, which means we
35052 can tie any smaller floating point modes to it. Note that we do not
35053 tie this with TFmode. */
35054 if (mode2
== XFmode
)
35055 return mode1
== SFmode
|| mode1
== DFmode
;
35057 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
35058 that we can tie it with SFmode. */
35059 if (mode2
== DFmode
)
35060 return mode1
== SFmode
;
35062 /* If MODE2 is only appropriate for an SSE register, then tie with
35063 any other mode acceptable to SSE registers. */
35064 if (GET_MODE_SIZE (mode2
) == 32
35065 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35066 return (GET_MODE_SIZE (mode1
) == 32
35067 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35068 if (GET_MODE_SIZE (mode2
) == 16
35069 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35070 return (GET_MODE_SIZE (mode1
) == 16
35071 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35073 /* If MODE2 is appropriate for an MMX register, then tie
35074 with any other mode acceptable to MMX registers. */
35075 if (GET_MODE_SIZE (mode2
) == 8
35076 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
35077 return (GET_MODE_SIZE (mode1
) == 8
35078 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
35083 /* Return the cost of moving between two registers of mode MODE. */
35086 ix86_set_reg_reg_cost (enum machine_mode mode
)
35088 unsigned int units
= UNITS_PER_WORD
;
35090 switch (GET_MODE_CLASS (mode
))
35096 units
= GET_MODE_SIZE (CCmode
);
35100 if ((TARGET_SSE
&& mode
== TFmode
)
35101 || (TARGET_80387
&& mode
== XFmode
)
35102 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
35103 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
35104 units
= GET_MODE_SIZE (mode
);
35107 case MODE_COMPLEX_FLOAT
:
35108 if ((TARGET_SSE
&& mode
== TCmode
)
35109 || (TARGET_80387
&& mode
== XCmode
)
35110 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
35111 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
35112 units
= GET_MODE_SIZE (mode
);
35115 case MODE_VECTOR_INT
:
35116 case MODE_VECTOR_FLOAT
:
35117 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
35118 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35119 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
35120 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
35121 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
35122 units
= GET_MODE_SIZE (mode
);
35125 /* Return the cost of moving between two registers of mode MODE,
35126 assuming that the move will be in pieces of at most UNITS bytes. */
35127 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
35130 /* Compute a (partial) cost for rtx X. Return true if the complete
35131 cost has been computed, and false if subexpressions should be
35132 scanned. In either case, *TOTAL contains the cost result. */
35135 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
35138 enum rtx_code code
= (enum rtx_code
) code_i
;
35139 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
35140 enum machine_mode mode
= GET_MODE (x
);
35141 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
35146 if (register_operand (SET_DEST (x
), VOIDmode
)
35147 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
35149 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
35158 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
35160 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
35162 else if (flag_pic
&& SYMBOLIC_CONST (x
)
35164 || (!GET_CODE (x
) != LABEL_REF
35165 && (GET_CODE (x
) != SYMBOL_REF
35166 || !SYMBOL_REF_LOCAL_P (x
)))))
35173 if (mode
== VOIDmode
)
35178 switch (standard_80387_constant_p (x
))
35183 default: /* Other constants */
35190 if (SSE_FLOAT_MODE_P (mode
))
35193 switch (standard_sse_constant_p (x
))
35197 case 1: /* 0: xor eliminates false dependency */
35200 default: /* -1: cmp contains false dependency */
35205 /* Fall back to (MEM (SYMBOL_REF)), since that's where
35206 it'll probably end up. Add a penalty for size. */
35207 *total
= (COSTS_N_INSNS (1)
35208 + (flag_pic
!= 0 && !TARGET_64BIT
)
35209 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
35213 /* The zero extensions is often completely free on x86_64, so make
35214 it as cheap as possible. */
35215 if (TARGET_64BIT
&& mode
== DImode
35216 && GET_MODE (XEXP (x
, 0)) == SImode
)
35218 else if (TARGET_ZERO_EXTEND_WITH_AND
)
35219 *total
= cost
->add
;
35221 *total
= cost
->movzx
;
35225 *total
= cost
->movsx
;
35229 if (SCALAR_INT_MODE_P (mode
)
35230 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
35231 && CONST_INT_P (XEXP (x
, 1)))
35233 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35236 *total
= cost
->add
;
35239 if ((value
== 2 || value
== 3)
35240 && cost
->lea
<= cost
->shift_const
)
35242 *total
= cost
->lea
;
35252 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35254 /* ??? Should be SSE vector operation cost. */
35255 /* At least for published AMD latencies, this really is the same
35256 as the latency for a simple fpu operation like fabs. */
35257 /* V*QImode is emulated with 1-11 insns. */
35258 if (mode
== V16QImode
|| mode
== V32QImode
)
35261 if (TARGET_XOP
&& mode
== V16QImode
)
35263 /* For XOP we use vpshab, which requires a broadcast of the
35264 value to the variable shift insn. For constants this
35265 means a V16Q const in mem; even when we can perform the
35266 shift with one insn set the cost to prefer paddb. */
35267 if (CONSTANT_P (XEXP (x
, 1)))
35269 *total
= (cost
->fabs
35270 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
35271 + (speed
? 2 : COSTS_N_BYTES (16)));
35276 else if (TARGET_SSSE3
)
35278 *total
= cost
->fabs
* count
;
35281 *total
= cost
->fabs
;
35283 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35285 if (CONST_INT_P (XEXP (x
, 1)))
35287 if (INTVAL (XEXP (x
, 1)) > 32)
35288 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
35290 *total
= cost
->shift_const
* 2;
35294 if (GET_CODE (XEXP (x
, 1)) == AND
)
35295 *total
= cost
->shift_var
* 2;
35297 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
35302 if (CONST_INT_P (XEXP (x
, 1)))
35303 *total
= cost
->shift_const
;
35304 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
35305 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
35307 /* Return the cost after shift-and truncation. */
35308 *total
= cost
->shift_var
;
35312 *total
= cost
->shift_var
;
35320 gcc_assert (FLOAT_MODE_P (mode
));
35321 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
35323 /* ??? SSE scalar/vector cost should be used here. */
35324 /* ??? Bald assumption that fma has the same cost as fmul. */
35325 *total
= cost
->fmul
;
35326 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
35328 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
35330 if (GET_CODE (sub
) == NEG
)
35331 sub
= XEXP (sub
, 0);
35332 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
35335 if (GET_CODE (sub
) == NEG
)
35336 sub
= XEXP (sub
, 0);
35337 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
35342 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35344 /* ??? SSE scalar cost should be used here. */
35345 *total
= cost
->fmul
;
35348 else if (X87_FLOAT_MODE_P (mode
))
35350 *total
= cost
->fmul
;
35353 else if (FLOAT_MODE_P (mode
))
35355 /* ??? SSE vector cost should be used here. */
35356 *total
= cost
->fmul
;
35359 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35361 /* V*QImode is emulated with 7-13 insns. */
35362 if (mode
== V16QImode
|| mode
== V32QImode
)
35365 if (TARGET_XOP
&& mode
== V16QImode
)
35367 else if (TARGET_SSSE3
)
35369 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
35371 /* V*DImode is emulated with 5-8 insns. */
35372 else if (mode
== V2DImode
|| mode
== V4DImode
)
35374 if (TARGET_XOP
&& mode
== V2DImode
)
35375 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
35377 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
35379 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
35380 insns, including two PMULUDQ. */
35381 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
35382 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
35384 *total
= cost
->fmul
;
35389 rtx op0
= XEXP (x
, 0);
35390 rtx op1
= XEXP (x
, 1);
35392 if (CONST_INT_P (XEXP (x
, 1)))
35394 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35395 for (nbits
= 0; value
!= 0; value
&= value
- 1)
35399 /* This is arbitrary. */
35402 /* Compute costs correctly for widening multiplication. */
35403 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
35404 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
35405 == GET_MODE_SIZE (mode
))
35407 int is_mulwiden
= 0;
35408 enum machine_mode inner_mode
= GET_MODE (op0
);
35410 if (GET_CODE (op0
) == GET_CODE (op1
))
35411 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
35412 else if (CONST_INT_P (op1
))
35414 if (GET_CODE (op0
) == SIGN_EXTEND
)
35415 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
35418 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
35422 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
35425 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
35426 + nbits
* cost
->mult_bit
35427 + rtx_cost (op0
, outer_code
, opno
, speed
)
35428 + rtx_cost (op1
, outer_code
, opno
, speed
));
35437 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35438 /* ??? SSE cost should be used here. */
35439 *total
= cost
->fdiv
;
35440 else if (X87_FLOAT_MODE_P (mode
))
35441 *total
= cost
->fdiv
;
35442 else if (FLOAT_MODE_P (mode
))
35443 /* ??? SSE vector cost should be used here. */
35444 *total
= cost
->fdiv
;
35446 *total
= cost
->divide
[MODE_INDEX (mode
)];
35450 if (GET_MODE_CLASS (mode
) == MODE_INT
35451 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
35453 if (GET_CODE (XEXP (x
, 0)) == PLUS
35454 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
35455 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
35456 && CONSTANT_P (XEXP (x
, 1)))
35458 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
35459 if (val
== 2 || val
== 4 || val
== 8)
35461 *total
= cost
->lea
;
35462 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35463 outer_code
, opno
, speed
);
35464 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
35465 outer_code
, opno
, speed
);
35466 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35470 else if (GET_CODE (XEXP (x
, 0)) == MULT
35471 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
35473 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
35474 if (val
== 2 || val
== 4 || val
== 8)
35476 *total
= cost
->lea
;
35477 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35478 outer_code
, opno
, speed
);
35479 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35483 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
35485 *total
= cost
->lea
;
35486 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35487 outer_code
, opno
, speed
);
35488 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35489 outer_code
, opno
, speed
);
35490 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35497 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35499 /* ??? SSE cost should be used here. */
35500 *total
= cost
->fadd
;
35503 else if (X87_FLOAT_MODE_P (mode
))
35505 *total
= cost
->fadd
;
35508 else if (FLOAT_MODE_P (mode
))
35510 /* ??? SSE vector cost should be used here. */
35511 *total
= cost
->fadd
;
35519 if (GET_MODE_CLASS (mode
) == MODE_INT
35520 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35522 *total
= (cost
->add
* 2
35523 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
35524 << (GET_MODE (XEXP (x
, 0)) != DImode
))
35525 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
35526 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
35532 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35534 /* ??? SSE cost should be used here. */
35535 *total
= cost
->fchs
;
35538 else if (X87_FLOAT_MODE_P (mode
))
35540 *total
= cost
->fchs
;
35543 else if (FLOAT_MODE_P (mode
))
35545 /* ??? SSE vector cost should be used here. */
35546 *total
= cost
->fchs
;
35552 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35554 /* ??? Should be SSE vector operation cost. */
35555 /* At least for published AMD latencies, this really is the same
35556 as the latency for a simple fpu operation like fabs. */
35557 *total
= cost
->fabs
;
35559 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35560 *total
= cost
->add
* 2;
35562 *total
= cost
->add
;
35566 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
35567 && XEXP (XEXP (x
, 0), 1) == const1_rtx
35568 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
35569 && XEXP (x
, 1) == const0_rtx
)
35571 /* This kind of construct is implemented using test[bwl].
35572 Treat it as if we had an AND. */
35573 *total
= (cost
->add
35574 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
35575 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
35581 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
35586 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35587 /* ??? SSE cost should be used here. */
35588 *total
= cost
->fabs
;
35589 else if (X87_FLOAT_MODE_P (mode
))
35590 *total
= cost
->fabs
;
35591 else if (FLOAT_MODE_P (mode
))
35592 /* ??? SSE vector cost should be used here. */
35593 *total
= cost
->fabs
;
35597 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35598 /* ??? SSE cost should be used here. */
35599 *total
= cost
->fsqrt
;
35600 else if (X87_FLOAT_MODE_P (mode
))
35601 *total
= cost
->fsqrt
;
35602 else if (FLOAT_MODE_P (mode
))
35603 /* ??? SSE vector cost should be used here. */
35604 *total
= cost
->fsqrt
;
35608 if (XINT (x
, 1) == UNSPEC_TP
)
35615 case VEC_DUPLICATE
:
35616 /* ??? Assume all of these vector manipulation patterns are
35617 recognizable. In which case they all pretty much have the
35619 *total
= cost
->fabs
;
35629 static int current_machopic_label_num
;
35631 /* Given a symbol name and its associated stub, write out the
35632 definition of the stub. */
35635 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35637 unsigned int length
;
35638 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35639 int label
= ++current_machopic_label_num
;
35641 /* For 64-bit we shouldn't get here. */
35642 gcc_assert (!TARGET_64BIT
);
35644 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35645 symb
= targetm
.strip_name_encoding (symb
);
35647 length
= strlen (stub
);
35648 binder_name
= XALLOCAVEC (char, length
+ 32);
35649 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35651 length
= strlen (symb
);
35652 symbol_name
= XALLOCAVEC (char, length
+ 32);
35653 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35655 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35657 if (MACHOPIC_ATT_STUB
)
35658 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35659 else if (MACHOPIC_PURE
)
35660 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35662 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35664 fprintf (file
, "%s:\n", stub
);
35665 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35667 if (MACHOPIC_ATT_STUB
)
35669 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35671 else if (MACHOPIC_PURE
)
35674 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35675 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35676 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35677 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35678 label
, lazy_ptr_name
, label
);
35679 fprintf (file
, "\tjmp\t*%%ecx\n");
35682 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35684 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35685 it needs no stub-binding-helper. */
35686 if (MACHOPIC_ATT_STUB
)
35689 fprintf (file
, "%s:\n", binder_name
);
35693 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35694 fprintf (file
, "\tpushl\t%%ecx\n");
35697 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35699 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35701 /* N.B. Keep the correspondence of these
35702 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35703 old-pic/new-pic/non-pic stubs; altering this will break
35704 compatibility with existing dylibs. */
35707 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35708 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35711 /* 16-byte -mdynamic-no-pic stub. */
35712 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35714 fprintf (file
, "%s:\n", lazy_ptr_name
);
35715 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35716 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35718 #endif /* TARGET_MACHO */
35720 /* Order the registers for register allocator. */
35723 x86_order_regs_for_local_alloc (void)
35728 /* First allocate the local general purpose registers. */
35729 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35730 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35731 reg_alloc_order
[pos
++] = i
;
35733 /* Global general purpose registers. */
35734 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35735 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35736 reg_alloc_order
[pos
++] = i
;
35738 /* x87 registers come first in case we are doing FP math
35740 if (!TARGET_SSE_MATH
)
35741 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35742 reg_alloc_order
[pos
++] = i
;
35744 /* SSE registers. */
35745 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35746 reg_alloc_order
[pos
++] = i
;
35747 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35748 reg_alloc_order
[pos
++] = i
;
35750 /* Extended REX SSE registers. */
35751 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
35752 reg_alloc_order
[pos
++] = i
;
35754 /* Mask register. */
35755 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
35756 reg_alloc_order
[pos
++] = i
;
35758 /* MPX bound registers. */
35759 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
35760 reg_alloc_order
[pos
++] = i
;
35762 /* x87 registers. */
35763 if (TARGET_SSE_MATH
)
35764 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35765 reg_alloc_order
[pos
++] = i
;
35767 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35768 reg_alloc_order
[pos
++] = i
;
35770 /* Initialize the rest of array as we do not allocate some registers
35772 while (pos
< FIRST_PSEUDO_REGISTER
)
35773 reg_alloc_order
[pos
++] = 0;
35776 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35777 in struct attribute_spec handler. */
35779 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35781 int flags ATTRIBUTE_UNUSED
,
35782 bool *no_add_attrs
)
35784 if (TREE_CODE (*node
) != FUNCTION_TYPE
35785 && TREE_CODE (*node
) != METHOD_TYPE
35786 && TREE_CODE (*node
) != FIELD_DECL
35787 && TREE_CODE (*node
) != TYPE_DECL
)
35789 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35791 *no_add_attrs
= true;
35796 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35798 *no_add_attrs
= true;
35801 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35805 cst
= TREE_VALUE (args
);
35806 if (TREE_CODE (cst
) != INTEGER_CST
)
35808 warning (OPT_Wattributes
,
35809 "%qE attribute requires an integer constant argument",
35811 *no_add_attrs
= true;
35813 else if (compare_tree_int (cst
, 0) != 0
35814 && compare_tree_int (cst
, 1) != 0)
35816 warning (OPT_Wattributes
,
35817 "argument to %qE attribute is neither zero, nor one",
35819 *no_add_attrs
= true;
35828 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35829 struct attribute_spec.handler. */
35831 ix86_handle_abi_attribute (tree
*node
, tree name
,
35832 tree args ATTRIBUTE_UNUSED
,
35833 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35835 if (TREE_CODE (*node
) != FUNCTION_TYPE
35836 && TREE_CODE (*node
) != METHOD_TYPE
35837 && TREE_CODE (*node
) != FIELD_DECL
35838 && TREE_CODE (*node
) != TYPE_DECL
)
35840 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35842 *no_add_attrs
= true;
35846 /* Can combine regparm with all attributes but fastcall. */
35847 if (is_attribute_p ("ms_abi", name
))
35849 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
35851 error ("ms_abi and sysv_abi attributes are not compatible");
35856 else if (is_attribute_p ("sysv_abi", name
))
35858 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
35860 error ("ms_abi and sysv_abi attributes are not compatible");
35869 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35870 struct attribute_spec.handler. */
35872 ix86_handle_struct_attribute (tree
*node
, tree name
,
35873 tree args ATTRIBUTE_UNUSED
,
35874 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35877 if (DECL_P (*node
))
35879 if (TREE_CODE (*node
) == TYPE_DECL
)
35880 type
= &TREE_TYPE (*node
);
35885 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
35887 warning (OPT_Wattributes
, "%qE attribute ignored",
35889 *no_add_attrs
= true;
35892 else if ((is_attribute_p ("ms_struct", name
)
35893 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35894 || ((is_attribute_p ("gcc_struct", name
)
35895 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35897 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35899 *no_add_attrs
= true;
35906 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
35907 tree args ATTRIBUTE_UNUSED
,
35908 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35910 if (TREE_CODE (*node
) != FUNCTION_DECL
)
35912 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35914 *no_add_attrs
= true;
35920 ix86_ms_bitfield_layout_p (const_tree record_type
)
35922 return ((TARGET_MS_BITFIELD_LAYOUT
35923 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35924 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
35927 /* Returns an expression indicating where the this parameter is
35928 located on entry to the FUNCTION. */
35931 x86_this_parameter (tree function
)
35933 tree type
= TREE_TYPE (function
);
35934 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
35939 const int *parm_regs
;
35941 if (ix86_function_type_abi (type
) == MS_ABI
)
35942 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
35944 parm_regs
= x86_64_int_parameter_registers
;
35945 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
35948 nregs
= ix86_function_regparm (type
, function
);
35950 if (nregs
> 0 && !stdarg_p (type
))
35953 unsigned int ccvt
= ix86_get_callcvt (type
);
35955 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35956 regno
= aggr
? DX_REG
: CX_REG
;
35957 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35961 return gen_rtx_MEM (SImode
,
35962 plus_constant (Pmode
, stack_pointer_rtx
, 4));
35971 return gen_rtx_MEM (SImode
,
35972 plus_constant (Pmode
,
35973 stack_pointer_rtx
, 4));
35976 return gen_rtx_REG (SImode
, regno
);
35979 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
35983 /* Determine whether x86_output_mi_thunk can succeed. */
35986 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
35987 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
35988 HOST_WIDE_INT vcall_offset
, const_tree function
)
35990 /* 64-bit can handle anything. */
35994 /* For 32-bit, everything's fine if we have one free register. */
35995 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
35998 /* Need a free register for vcall_offset. */
36002 /* Need a free register for GOT references. */
36003 if (flag_pic
&& !targetm
.binds_local_p (function
))
36006 /* Otherwise ok. */
36010 /* Output the assembler code for a thunk function. THUNK_DECL is the
36011 declaration for the thunk function itself, FUNCTION is the decl for
36012 the target function. DELTA is an immediate constant offset to be
36013 added to THIS. If VCALL_OFFSET is nonzero, the word at
36014 *(*this + vcall_offset) should be added to THIS. */
36017 x86_output_mi_thunk (FILE *file
,
36018 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
36019 HOST_WIDE_INT vcall_offset
, tree function
)
36021 rtx this_param
= x86_this_parameter (function
);
36022 rtx this_reg
, tmp
, fnaddr
;
36023 unsigned int tmp_regno
;
36026 tmp_regno
= R10_REG
;
36029 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
36030 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
36031 tmp_regno
= AX_REG
;
36032 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
36033 tmp_regno
= DX_REG
;
36035 tmp_regno
= CX_REG
;
36038 emit_note (NOTE_INSN_PROLOGUE_END
);
36040 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
36041 pull it in now and let DELTA benefit. */
36042 if (REG_P (this_param
))
36043 this_reg
= this_param
;
36044 else if (vcall_offset
)
36046 /* Put the this parameter into %eax. */
36047 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
36048 emit_move_insn (this_reg
, this_param
);
36051 this_reg
= NULL_RTX
;
36053 /* Adjust the this parameter by a fixed constant. */
36056 rtx delta_rtx
= GEN_INT (delta
);
36057 rtx delta_dst
= this_reg
? this_reg
: this_param
;
36061 if (!x86_64_general_operand (delta_rtx
, Pmode
))
36063 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36064 emit_move_insn (tmp
, delta_rtx
);
36069 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
36072 /* Adjust the this parameter by a value stored in the vtable. */
36075 rtx vcall_addr
, vcall_mem
, this_mem
;
36077 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36079 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
36080 if (Pmode
!= ptr_mode
)
36081 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
36082 emit_move_insn (tmp
, this_mem
);
36084 /* Adjust the this parameter. */
36085 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
36087 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
36089 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
36090 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
36091 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
36094 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
36095 if (Pmode
!= ptr_mode
)
36096 emit_insn (gen_addsi_1_zext (this_reg
,
36097 gen_rtx_REG (ptr_mode
,
36101 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
36104 /* If necessary, drop THIS back to its stack slot. */
36105 if (this_reg
&& this_reg
!= this_param
)
36106 emit_move_insn (this_param
, this_reg
);
36108 fnaddr
= XEXP (DECL_RTL (function
), 0);
36111 if (!flag_pic
|| targetm
.binds_local_p (function
)
36116 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
36117 tmp
= gen_rtx_CONST (Pmode
, tmp
);
36118 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
36123 if (!flag_pic
|| targetm
.binds_local_p (function
))
36126 else if (TARGET_MACHO
)
36128 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
36129 fnaddr
= XEXP (fnaddr
, 0);
36131 #endif /* TARGET_MACHO */
36134 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
36135 output_set_got (tmp
, NULL_RTX
);
36137 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
36138 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
36139 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
36143 /* Our sibling call patterns do not allow memories, because we have no
36144 predicate that can distinguish between frame and non-frame memory.
36145 For our purposes here, we can get away with (ab)using a jump pattern,
36146 because we're going to do no optimization. */
36147 if (MEM_P (fnaddr
))
36148 emit_jump_insn (gen_indirect_jump (fnaddr
));
36151 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
36152 fnaddr
= legitimize_pic_address (fnaddr
,
36153 gen_rtx_REG (Pmode
, tmp_regno
));
36155 if (!sibcall_insn_operand (fnaddr
, word_mode
))
36157 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
36158 if (GET_MODE (fnaddr
) != word_mode
)
36159 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
36160 emit_move_insn (tmp
, fnaddr
);
36164 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
36165 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
36166 tmp
= emit_call_insn (tmp
);
36167 SIBLING_CALL_P (tmp
) = 1;
36171 /* Emit just enough of rest_of_compilation to get the insns emitted.
36172 Note that use_thunk calls assemble_start_function et al. */
36173 tmp
= get_insns ();
36174 shorten_branches (tmp
);
36175 final_start_function (tmp
, file
, 1);
36176 final (tmp
, file
, 1);
36177 final_end_function ();
36181 x86_file_start (void)
36183 default_file_start ();
36185 darwin_file_start ();
36187 if (X86_FILE_START_VERSION_DIRECTIVE
)
36188 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
36189 if (X86_FILE_START_FLTUSED
)
36190 fputs ("\t.global\t__fltused\n", asm_out_file
);
36191 if (ix86_asm_dialect
== ASM_INTEL
)
36192 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
36196 x86_field_alignment (tree field
, int computed
)
36198 enum machine_mode mode
;
36199 tree type
= TREE_TYPE (field
);
36201 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
36203 mode
= TYPE_MODE (strip_array_types (type
));
36204 if (mode
== DFmode
|| mode
== DCmode
36205 || GET_MODE_CLASS (mode
) == MODE_INT
36206 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
36207 return MIN (32, computed
);
36211 /* Output assembler code to FILE to increment profiler label # LABELNO
36212 for profiling a function entry. */
36214 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
36216 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
36221 #ifndef NO_PROFILE_COUNTERS
36222 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
36225 if (!TARGET_PECOFF
&& flag_pic
)
36226 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
36228 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36232 #ifndef NO_PROFILE_COUNTERS
36233 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
36236 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
36240 #ifndef NO_PROFILE_COUNTERS
36241 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
36244 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36248 /* We don't have exact information about the insn sizes, but we may assume
36249 quite safely that we are informed about all 1 byte insns and memory
36250 address sizes. This is enough to eliminate unnecessary padding in
36254 min_insn_size (rtx insn
)
36258 if (!INSN_P (insn
) || !active_insn_p (insn
))
36261 /* Discard alignments we've emit and jump instructions. */
36262 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
36263 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
36266 /* Important case - calls are always 5 bytes.
36267 It is common to have many calls in the row. */
36269 && symbolic_reference_mentioned_p (PATTERN (insn
))
36270 && !SIBLING_CALL_P (insn
))
36272 len
= get_attr_length (insn
);
36276 /* For normal instructions we rely on get_attr_length being exact,
36277 with a few exceptions. */
36278 if (!JUMP_P (insn
))
36280 enum attr_type type
= get_attr_type (insn
);
36285 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
36286 || asm_noperands (PATTERN (insn
)) >= 0)
36293 /* Otherwise trust get_attr_length. */
36297 l
= get_attr_length_address (insn
);
36298 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
36307 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36309 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
36313 ix86_avoid_jump_mispredicts (void)
36315 rtx insn
, start
= get_insns ();
36316 int nbytes
= 0, njumps
= 0;
36319 /* Look for all minimal intervals of instructions containing 4 jumps.
36320 The intervals are bounded by START and INSN. NBYTES is the total
36321 size of instructions in the interval including INSN and not including
36322 START. When the NBYTES is smaller than 16 bytes, it is possible
36323 that the end of START and INSN ends up in the same 16byte page.
36325 The smallest offset in the page INSN can start is the case where START
36326 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
36327 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
36329 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
36333 if (LABEL_P (insn
))
36335 int align
= label_to_alignment (insn
);
36336 int max_skip
= label_to_max_skip (insn
);
36340 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
36341 already in the current 16 byte page, because otherwise
36342 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
36343 bytes to reach 16 byte boundary. */
36345 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
36348 fprintf (dump_file
, "Label %i with max_skip %i\n",
36349 INSN_UID (insn
), max_skip
);
36352 while (nbytes
+ max_skip
>= 16)
36354 start
= NEXT_INSN (start
);
36355 if (JUMP_P (start
) || CALL_P (start
))
36356 njumps
--, isjump
= 1;
36359 nbytes
-= min_insn_size (start
);
36365 min_size
= min_insn_size (insn
);
36366 nbytes
+= min_size
;
36368 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
36369 INSN_UID (insn
), min_size
);
36370 if (JUMP_P (insn
) || CALL_P (insn
))
36377 start
= NEXT_INSN (start
);
36378 if (JUMP_P (start
) || CALL_P (start
))
36379 njumps
--, isjump
= 1;
36382 nbytes
-= min_insn_size (start
);
36384 gcc_assert (njumps
>= 0);
36386 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
36387 INSN_UID (start
), INSN_UID (insn
), nbytes
);
36389 if (njumps
== 3 && isjump
&& nbytes
< 16)
36391 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
36394 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
36395 INSN_UID (insn
), padsize
);
36396 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
36402 /* AMD Athlon works faster
36403 when RET is not destination of conditional jump or directly preceded
36404 by other jump instruction. We avoid the penalty by inserting NOP just
36405 before the RET instructions in such cases. */
36407 ix86_pad_returns (void)
36412 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36414 basic_block bb
= e
->src
;
36415 rtx ret
= BB_END (bb
);
36417 bool replace
= false;
36419 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
36420 || optimize_bb_for_size_p (bb
))
36422 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
36423 if (active_insn_p (prev
) || LABEL_P (prev
))
36425 if (prev
&& LABEL_P (prev
))
36430 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36431 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
36432 && !(e
->flags
& EDGE_FALLTHRU
))
36440 prev
= prev_active_insn (ret
);
36442 && ((JUMP_P (prev
) && any_condjump_p (prev
))
36445 /* Empty functions get branch mispredict even when
36446 the jump destination is not visible to us. */
36447 if (!prev
&& !optimize_function_for_size_p (cfun
))
36452 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
36458 /* Count the minimum number of instructions in BB. Return 4 if the
36459 number of instructions >= 4. */
36462 ix86_count_insn_bb (basic_block bb
)
36465 int insn_count
= 0;
36467 /* Count number of instructions in this block. Return 4 if the number
36468 of instructions >= 4. */
36469 FOR_BB_INSNS (bb
, insn
)
36471 /* Only happen in exit blocks. */
36473 && ANY_RETURN_P (PATTERN (insn
)))
36476 if (NONDEBUG_INSN_P (insn
)
36477 && GET_CODE (PATTERN (insn
)) != USE
36478 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
36481 if (insn_count
>= 4)
36490 /* Count the minimum number of instructions in code path in BB.
36491 Return 4 if the number of instructions >= 4. */
36494 ix86_count_insn (basic_block bb
)
36498 int min_prev_count
;
36500 /* Only bother counting instructions along paths with no
36501 more than 2 basic blocks between entry and exit. Given
36502 that BB has an edge to exit, determine if a predecessor
36503 of BB has an edge from entry. If so, compute the number
36504 of instructions in the predecessor block. If there
36505 happen to be multiple such blocks, compute the minimum. */
36506 min_prev_count
= 4;
36507 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36510 edge_iterator prev_ei
;
36512 if (e
->src
== ENTRY_BLOCK_PTR
)
36514 min_prev_count
= 0;
36517 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
36519 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
36521 int count
= ix86_count_insn_bb (e
->src
);
36522 if (count
< min_prev_count
)
36523 min_prev_count
= count
;
36529 if (min_prev_count
< 4)
36530 min_prev_count
+= ix86_count_insn_bb (bb
);
36532 return min_prev_count
;
36535 /* Pad short function to 4 instructions. */
36538 ix86_pad_short_function (void)
36543 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36545 rtx ret
= BB_END (e
->src
);
36546 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
36548 int insn_count
= ix86_count_insn (e
->src
);
36550 /* Pad short function. */
36551 if (insn_count
< 4)
36555 /* Find epilogue. */
36558 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
36559 insn
= PREV_INSN (insn
);
36564 /* Two NOPs count as one instruction. */
36565 insn_count
= 2 * (4 - insn_count
);
36566 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
36572 /* Fix up a Windows system unwinder issue. If an EH region falls through into
36573 the epilogue, the Windows system unwinder will apply epilogue logic and
36574 produce incorrect offsets. This can be avoided by adding a nop between
36575 the last insn that can throw and the first insn of the epilogue. */
36578 ix86_seh_fixup_eh_fallthru (void)
36583 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36587 /* Find the beginning of the epilogue. */
36588 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
36589 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
36594 /* We only care about preceding insns that can throw. */
36595 insn
= prev_active_insn (insn
);
36596 if (insn
== NULL
|| !can_throw_internal (insn
))
36599 /* Do not separate calls from their debug information. */
36600 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
36602 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
36603 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
36608 emit_insn_after (gen_nops (const1_rtx
), insn
);
36612 /* Implement machine specific optimizations. We implement padding of returns
36613 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36617 /* We are freeing block_for_insn in the toplev to keep compatibility
36618 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36619 compute_bb_for_insn ();
36621 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36622 ix86_seh_fixup_eh_fallthru ();
36624 if (optimize
&& optimize_function_for_speed_p (cfun
))
36626 if (TARGET_PAD_SHORT_FUNCTION
)
36627 ix86_pad_short_function ();
36628 else if (TARGET_PAD_RETURNS
)
36629 ix86_pad_returns ();
36630 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36631 if (TARGET_FOUR_JUMP_LIMIT
)
36632 ix86_avoid_jump_mispredicts ();
36637 /* Return nonzero when QImode register that must be represented via REX prefix
36640 x86_extended_QIreg_mentioned_p (rtx insn
)
36643 extract_insn_cached (insn
);
36644 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36645 if (GENERAL_REG_P (recog_data
.operand
[i
])
36646 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36651 /* Return nonzero when P points to register encoded via REX prefix.
36652 Called via for_each_rtx. */
36654 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36656 unsigned int regno
;
36659 regno
= REGNO (*p
);
36660 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36663 /* Return true when INSN mentions register that must be encoded using REX
36666 x86_extended_reg_mentioned_p (rtx insn
)
36668 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36669 extended_reg_mentioned_1
, NULL
);
36672 /* If profitable, negate (without causing overflow) integer constant
36673 of mode MODE at location LOC. Return true in this case. */
36675 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36679 if (!CONST_INT_P (*loc
))
36685 /* DImode x86_64 constants must fit in 32 bits. */
36686 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36697 gcc_unreachable ();
36700 /* Avoid overflows. */
36701 if (mode_signbit_p (mode
, *loc
))
36704 val
= INTVAL (*loc
);
36706 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36707 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36708 if ((val
< 0 && val
!= -128)
36711 *loc
= GEN_INT (-val
);
36718 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36719 optabs would emit if we didn't have TFmode patterns. */
36722 x86_emit_floatuns (rtx operands
[2])
36724 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36725 enum machine_mode mode
, inmode
;
36727 inmode
= GET_MODE (operands
[1]);
36728 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36731 in
= force_reg (inmode
, operands
[1]);
36732 mode
= GET_MODE (out
);
36733 neglab
= gen_label_rtx ();
36734 donelab
= gen_label_rtx ();
36735 f0
= gen_reg_rtx (mode
);
36737 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36739 expand_float (out
, in
, 0);
36741 emit_jump_insn (gen_jump (donelab
));
36744 emit_label (neglab
);
36746 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36748 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36750 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36752 expand_float (f0
, i0
, 0);
36754 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36756 emit_label (donelab
);
36759 /* AVX512F does support 64-byte integer vector operations,
36760 thus the longest vector we are faced with is V64QImode. */
36761 #define MAX_VECT_LEN 64
36763 struct expand_vec_perm_d
36765 rtx target
, op0
, op1
;
36766 unsigned char perm
[MAX_VECT_LEN
];
36767 enum machine_mode vmode
;
36768 unsigned char nelt
;
36769 bool one_operand_p
;
36773 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36774 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36775 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36777 /* Get a vector mode of the same size as the original but with elements
36778 twice as wide. This is only guaranteed to apply to integral vectors. */
36780 static inline enum machine_mode
36781 get_mode_wider_vector (enum machine_mode o
)
36783 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36784 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36785 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36786 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36790 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36791 with all elements equal to VAR. Return true if successful. */
36794 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36795 rtx target
, rtx val
)
36818 /* First attempt to recognize VAL as-is. */
36819 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36820 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36821 if (recog_memoized (insn
) < 0)
36824 /* If that fails, force VAL into a register. */
36827 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36828 seq
= get_insns ();
36831 emit_insn_before (seq
, insn
);
36833 ok
= recog_memoized (insn
) >= 0;
36842 if (TARGET_SSE
|| TARGET_3DNOW_A
)
36846 val
= gen_lowpart (SImode
, val
);
36847 x
= gen_rtx_TRUNCATE (HImode
, val
);
36848 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
36849 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36862 struct expand_vec_perm_d dperm
;
36866 memset (&dperm
, 0, sizeof (dperm
));
36867 dperm
.target
= target
;
36868 dperm
.vmode
= mode
;
36869 dperm
.nelt
= GET_MODE_NUNITS (mode
);
36870 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
36871 dperm
.one_operand_p
= true;
36873 /* Extend to SImode using a paradoxical SUBREG. */
36874 tmp1
= gen_reg_rtx (SImode
);
36875 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
36877 /* Insert the SImode value as low element of a V4SImode vector. */
36878 tmp2
= gen_reg_rtx (V4SImode
);
36879 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
36880 emit_move_insn (dperm
.op0
, gen_lowpart (mode
, tmp2
));
36882 ok
= (expand_vec_perm_1 (&dperm
)
36883 || expand_vec_perm_broadcast_1 (&dperm
));
36895 /* Replicate the value once into the next wider mode and recurse. */
36897 enum machine_mode smode
, wsmode
, wvmode
;
36900 smode
= GET_MODE_INNER (mode
);
36901 wvmode
= get_mode_wider_vector (mode
);
36902 wsmode
= GET_MODE_INNER (wvmode
);
36904 val
= convert_modes (wsmode
, smode
, val
, true);
36905 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
36906 GEN_INT (GET_MODE_BITSIZE (smode
)),
36907 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36908 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
36910 x
= gen_reg_rtx (wvmode
);
36911 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
36913 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), x
));
36920 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
36921 rtx x
= gen_reg_rtx (hvmode
);
36923 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
36926 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
36927 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36936 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36937 whose ONE_VAR element is VAR, and other elements are zero. Return true
36941 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
36942 rtx target
, rtx var
, int one_var
)
36944 enum machine_mode vsimode
;
36947 bool use_vector_set
= false;
36952 /* For SSE4.1, we normally use vector set. But if the second
36953 element is zero and inter-unit moves are OK, we use movq
36955 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
36956 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
36962 use_vector_set
= TARGET_SSE4_1
;
36965 use_vector_set
= TARGET_SSE2
;
36968 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
36975 use_vector_set
= TARGET_AVX
;
36978 /* Use ix86_expand_vector_set in 64bit mode only. */
36979 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
36985 if (use_vector_set
)
36987 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
36988 var
= force_reg (GET_MODE_INNER (mode
), var
);
36989 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37005 var
= force_reg (GET_MODE_INNER (mode
), var
);
37006 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
37007 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37012 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
37013 new_target
= gen_reg_rtx (mode
);
37015 new_target
= target
;
37016 var
= force_reg (GET_MODE_INNER (mode
), var
);
37017 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
37018 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
37019 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
37022 /* We need to shuffle the value to the correct position, so
37023 create a new pseudo to store the intermediate result. */
37025 /* With SSE2, we can use the integer shuffle insns. */
37026 if (mode
!= V4SFmode
&& TARGET_SSE2
)
37028 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
37030 GEN_INT (one_var
== 1 ? 0 : 1),
37031 GEN_INT (one_var
== 2 ? 0 : 1),
37032 GEN_INT (one_var
== 3 ? 0 : 1)));
37033 if (target
!= new_target
)
37034 emit_move_insn (target
, new_target
);
37038 /* Otherwise convert the intermediate result to V4SFmode and
37039 use the SSE1 shuffle instructions. */
37040 if (mode
!= V4SFmode
)
37042 tmp
= gen_reg_rtx (V4SFmode
);
37043 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
37048 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
37050 GEN_INT (one_var
== 1 ? 0 : 1),
37051 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
37052 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
37054 if (mode
!= V4SFmode
)
37055 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
37056 else if (tmp
!= target
)
37057 emit_move_insn (target
, tmp
);
37059 else if (target
!= new_target
)
37060 emit_move_insn (target
, new_target
);
37065 vsimode
= V4SImode
;
37071 vsimode
= V2SImode
;
37077 /* Zero extend the variable element to SImode and recurse. */
37078 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
37080 x
= gen_reg_rtx (vsimode
);
37081 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
37083 gcc_unreachable ();
37085 emit_move_insn (target
, gen_lowpart (mode
, x
));
37093 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37094 consisting of the values in VALS. It is known that all elements
37095 except ONE_VAR are constants. Return true if successful. */
37098 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
37099 rtx target
, rtx vals
, int one_var
)
37101 rtx var
= XVECEXP (vals
, 0, one_var
);
37102 enum machine_mode wmode
;
37105 const_vec
= copy_rtx (vals
);
37106 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
37107 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
37115 /* For the two element vectors, it's just as easy to use
37116 the general case. */
37120 /* Use ix86_expand_vector_set in 64bit mode only. */
37143 /* There's no way to set one QImode entry easily. Combine
37144 the variable value with its adjacent constant value, and
37145 promote to an HImode set. */
37146 x
= XVECEXP (vals
, 0, one_var
^ 1);
37149 var
= convert_modes (HImode
, QImode
, var
, true);
37150 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
37151 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
37152 x
= GEN_INT (INTVAL (x
) & 0xff);
37156 var
= convert_modes (HImode
, QImode
, var
, true);
37157 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
37159 if (x
!= const0_rtx
)
37160 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
37161 1, OPTAB_LIB_WIDEN
);
37163 x
= gen_reg_rtx (wmode
);
37164 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
37165 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
37167 emit_move_insn (target
, gen_lowpart (mode
, x
));
37174 emit_move_insn (target
, const_vec
);
37175 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37179 /* A subroutine of ix86_expand_vector_init_general. Use vector
37180 concatenate to handle the most general case: all values variable,
37181 and none identical. */
37184 ix86_expand_vector_init_concat (enum machine_mode mode
,
37185 rtx target
, rtx
*ops
, int n
)
37187 enum machine_mode cmode
, hmode
= VOIDmode
;
37188 rtx first
[8], second
[4];
37228 gcc_unreachable ();
37231 if (!register_operand (ops
[1], cmode
))
37232 ops
[1] = force_reg (cmode
, ops
[1]);
37233 if (!register_operand (ops
[0], cmode
))
37234 ops
[0] = force_reg (cmode
, ops
[0]);
37235 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37236 gen_rtx_VEC_CONCAT (mode
, ops
[0],
37256 gcc_unreachable ();
37272 gcc_unreachable ();
37277 /* FIXME: We process inputs backward to help RA. PR 36222. */
37280 for (; i
> 0; i
-= 2, j
--)
37282 first
[j
] = gen_reg_rtx (cmode
);
37283 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
37284 ix86_expand_vector_init (false, first
[j
],
37285 gen_rtx_PARALLEL (cmode
, v
));
37291 gcc_assert (hmode
!= VOIDmode
);
37292 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37294 second
[j
] = gen_reg_rtx (hmode
);
37295 ix86_expand_vector_init_concat (hmode
, second
[j
],
37299 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
37302 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
37306 gcc_unreachable ();
37310 /* A subroutine of ix86_expand_vector_init_general. Use vector
37311 interleave to handle the most general case: all values variable,
37312 and none identical. */
37315 ix86_expand_vector_init_interleave (enum machine_mode mode
,
37316 rtx target
, rtx
*ops
, int n
)
37318 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
37321 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
37322 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
37323 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
37328 gen_load_even
= gen_vec_setv8hi
;
37329 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
37330 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37331 inner_mode
= HImode
;
37332 first_imode
= V4SImode
;
37333 second_imode
= V2DImode
;
37334 third_imode
= VOIDmode
;
37337 gen_load_even
= gen_vec_setv16qi
;
37338 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
37339 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
37340 inner_mode
= QImode
;
37341 first_imode
= V8HImode
;
37342 second_imode
= V4SImode
;
37343 third_imode
= V2DImode
;
37346 gcc_unreachable ();
37349 for (i
= 0; i
< n
; i
++)
37351 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
37352 op0
= gen_reg_rtx (SImode
);
37353 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
37355 /* Insert the SImode value as low element of V4SImode vector. */
37356 op1
= gen_reg_rtx (V4SImode
);
37357 op0
= gen_rtx_VEC_MERGE (V4SImode
,
37358 gen_rtx_VEC_DUPLICATE (V4SImode
,
37360 CONST0_RTX (V4SImode
),
37362 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
37364 /* Cast the V4SImode vector back to a vector in orignal mode. */
37365 op0
= gen_reg_rtx (mode
);
37366 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
37368 /* Load even elements into the second position. */
37369 emit_insn (gen_load_even (op0
,
37370 force_reg (inner_mode
,
37374 /* Cast vector to FIRST_IMODE vector. */
37375 ops
[i
] = gen_reg_rtx (first_imode
);
37376 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
37379 /* Interleave low FIRST_IMODE vectors. */
37380 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37382 op0
= gen_reg_rtx (first_imode
);
37383 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
37385 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
37386 ops
[j
] = gen_reg_rtx (second_imode
);
37387 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
37390 /* Interleave low SECOND_IMODE vectors. */
37391 switch (second_imode
)
37394 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
37396 op0
= gen_reg_rtx (second_imode
);
37397 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
37400 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
37402 ops
[j
] = gen_reg_rtx (third_imode
);
37403 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
37405 second_imode
= V2DImode
;
37406 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37410 op0
= gen_reg_rtx (second_imode
);
37411 emit_insn (gen_interleave_second_low (op0
, ops
[0],
37414 /* Cast the SECOND_IMODE vector back to a vector on original
37416 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37417 gen_lowpart (mode
, op0
)));
37421 gcc_unreachable ();
37425 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
37426 all values variable, and none identical. */
37429 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
37430 rtx target
, rtx vals
)
37432 rtx ops
[32], op0
, op1
;
37433 enum machine_mode half_mode
= VOIDmode
;
37440 if (!mmx_ok
&& !TARGET_SSE
)
37452 n
= GET_MODE_NUNITS (mode
);
37453 for (i
= 0; i
< n
; i
++)
37454 ops
[i
] = XVECEXP (vals
, 0, i
);
37455 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
37459 half_mode
= V16QImode
;
37463 half_mode
= V8HImode
;
37467 n
= GET_MODE_NUNITS (mode
);
37468 for (i
= 0; i
< n
; i
++)
37469 ops
[i
] = XVECEXP (vals
, 0, i
);
37470 op0
= gen_reg_rtx (half_mode
);
37471 op1
= gen_reg_rtx (half_mode
);
37472 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
37474 ix86_expand_vector_init_interleave (half_mode
, op1
,
37475 &ops
[n
>> 1], n
>> 2);
37476 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37477 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
37481 if (!TARGET_SSE4_1
)
37489 /* Don't use ix86_expand_vector_init_interleave if we can't
37490 move from GPR to SSE register directly. */
37491 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
37494 n
= GET_MODE_NUNITS (mode
);
37495 for (i
= 0; i
< n
; i
++)
37496 ops
[i
] = XVECEXP (vals
, 0, i
);
37497 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
37505 gcc_unreachable ();
37509 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
37510 enum machine_mode inner_mode
;
37511 rtx words
[4], shift
;
37513 inner_mode
= GET_MODE_INNER (mode
);
37514 n_elts
= GET_MODE_NUNITS (mode
);
37515 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
37516 n_elt_per_word
= n_elts
/ n_words
;
37517 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
37519 for (i
= 0; i
< n_words
; ++i
)
37521 rtx word
= NULL_RTX
;
37523 for (j
= 0; j
< n_elt_per_word
; ++j
)
37525 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
37526 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
37532 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
37533 word
, 1, OPTAB_LIB_WIDEN
);
37534 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
37535 word
, 1, OPTAB_LIB_WIDEN
);
37543 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
37544 else if (n_words
== 2)
37546 rtx tmp
= gen_reg_rtx (mode
);
37547 emit_clobber (tmp
);
37548 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
37549 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
37550 emit_move_insn (target
, tmp
);
37552 else if (n_words
== 4)
37554 rtx tmp
= gen_reg_rtx (V4SImode
);
37555 gcc_assert (word_mode
== SImode
);
37556 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
37557 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
37558 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
37561 gcc_unreachable ();
37565 /* Initialize vector TARGET via VALS. Suppress the use of MMX
37566 instructions unless MMX_OK is true. */
37569 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
37571 enum machine_mode mode
= GET_MODE (target
);
37572 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37573 int n_elts
= GET_MODE_NUNITS (mode
);
37574 int n_var
= 0, one_var
= -1;
37575 bool all_same
= true, all_const_zero
= true;
37579 for (i
= 0; i
< n_elts
; ++i
)
37581 x
= XVECEXP (vals
, 0, i
);
37582 if (!(CONST_INT_P (x
)
37583 || GET_CODE (x
) == CONST_DOUBLE
37584 || GET_CODE (x
) == CONST_FIXED
))
37585 n_var
++, one_var
= i
;
37586 else if (x
!= CONST0_RTX (inner_mode
))
37587 all_const_zero
= false;
37588 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
37592 /* Constants are best loaded from the constant pool. */
37595 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
37599 /* If all values are identical, broadcast the value. */
37601 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
37602 XVECEXP (vals
, 0, 0)))
37605 /* Values where only one field is non-constant are best loaded from
37606 the pool and overwritten via move later. */
37610 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
37611 XVECEXP (vals
, 0, one_var
),
37615 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37619 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37623 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37625 enum machine_mode mode
= GET_MODE (target
);
37626 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37627 enum machine_mode half_mode
;
37628 bool use_vec_merge
= false;
37630 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37632 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37633 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37634 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37635 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37636 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37637 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37639 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37641 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37642 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37643 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37644 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37645 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37646 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37656 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37657 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37659 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37661 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37662 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37668 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37672 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37673 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37675 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37677 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37678 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37685 /* For the two element vectors, we implement a VEC_CONCAT with
37686 the extraction of the other element. */
37688 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37689 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37692 op0
= val
, op1
= tmp
;
37694 op0
= tmp
, op1
= val
;
37696 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37697 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37702 use_vec_merge
= TARGET_SSE4_1
;
37709 use_vec_merge
= true;
37713 /* tmp = target = A B C D */
37714 tmp
= copy_to_reg (target
);
37715 /* target = A A B B */
37716 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37717 /* target = X A B B */
37718 ix86_expand_vector_set (false, target
, val
, 0);
37719 /* target = A X C D */
37720 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37721 const1_rtx
, const0_rtx
,
37722 GEN_INT (2+4), GEN_INT (3+4)));
37726 /* tmp = target = A B C D */
37727 tmp
= copy_to_reg (target
);
37728 /* tmp = X B C D */
37729 ix86_expand_vector_set (false, tmp
, val
, 0);
37730 /* target = A B X D */
37731 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37732 const0_rtx
, const1_rtx
,
37733 GEN_INT (0+4), GEN_INT (3+4)));
37737 /* tmp = target = A B C D */
37738 tmp
= copy_to_reg (target
);
37739 /* tmp = X B C D */
37740 ix86_expand_vector_set (false, tmp
, val
, 0);
37741 /* target = A B X D */
37742 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37743 const0_rtx
, const1_rtx
,
37744 GEN_INT (2+4), GEN_INT (0+4)));
37748 gcc_unreachable ();
37753 use_vec_merge
= TARGET_SSE4_1
;
37757 /* Element 0 handled by vec_merge below. */
37760 use_vec_merge
= true;
37766 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37767 store into element 0, then shuffle them back. */
37771 order
[0] = GEN_INT (elt
);
37772 order
[1] = const1_rtx
;
37773 order
[2] = const2_rtx
;
37774 order
[3] = GEN_INT (3);
37775 order
[elt
] = const0_rtx
;
37777 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37778 order
[1], order
[2], order
[3]));
37780 ix86_expand_vector_set (false, target
, val
, 0);
37782 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37783 order
[1], order
[2], order
[3]));
37787 /* For SSE1, we have to reuse the V4SF code. */
37788 rtx t
= gen_reg_rtx (V4SFmode
);
37789 ix86_expand_vector_set (false, t
, gen_lowpart (SFmode
, val
), elt
);
37790 emit_move_insn (target
, gen_lowpart (mode
, t
));
37795 use_vec_merge
= TARGET_SSE2
;
37798 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37802 use_vec_merge
= TARGET_SSE4_1
;
37809 half_mode
= V16QImode
;
37815 half_mode
= V8HImode
;
37821 half_mode
= V4SImode
;
37827 half_mode
= V2DImode
;
37833 half_mode
= V4SFmode
;
37839 half_mode
= V2DFmode
;
37845 /* Compute offset. */
37849 gcc_assert (i
<= 1);
37851 /* Extract the half. */
37852 tmp
= gen_reg_rtx (half_mode
);
37853 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
37855 /* Put val in tmp at elt. */
37856 ix86_expand_vector_set (false, tmp
, val
, elt
);
37859 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
37868 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
37869 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
37870 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37874 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37876 emit_move_insn (mem
, target
);
37878 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37879 emit_move_insn (tmp
, val
);
37881 emit_move_insn (target
, mem
);
37886 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
37888 enum machine_mode mode
= GET_MODE (vec
);
37889 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37890 bool use_vec_extr
= false;
37903 use_vec_extr
= true;
37907 use_vec_extr
= TARGET_SSE4_1
;
37919 tmp
= gen_reg_rtx (mode
);
37920 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
37921 GEN_INT (elt
), GEN_INT (elt
),
37922 GEN_INT (elt
+4), GEN_INT (elt
+4)));
37926 tmp
= gen_reg_rtx (mode
);
37927 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
37931 gcc_unreachable ();
37934 use_vec_extr
= true;
37939 use_vec_extr
= TARGET_SSE4_1
;
37953 tmp
= gen_reg_rtx (mode
);
37954 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
37955 GEN_INT (elt
), GEN_INT (elt
),
37956 GEN_INT (elt
), GEN_INT (elt
)));
37960 tmp
= gen_reg_rtx (mode
);
37961 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
37965 gcc_unreachable ();
37968 use_vec_extr
= true;
37973 /* For SSE1, we have to reuse the V4SF code. */
37974 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
37975 gen_lowpart (V4SFmode
, vec
), elt
);
37981 use_vec_extr
= TARGET_SSE2
;
37984 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37988 use_vec_extr
= TARGET_SSE4_1
;
37994 tmp
= gen_reg_rtx (V4SFmode
);
37996 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
37998 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
37999 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38007 tmp
= gen_reg_rtx (V2DFmode
);
38009 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
38011 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
38012 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38020 tmp
= gen_reg_rtx (V16QImode
);
38022 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
38024 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
38025 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
38033 tmp
= gen_reg_rtx (V8HImode
);
38035 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
38037 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
38038 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
38046 tmp
= gen_reg_rtx (V4SImode
);
38048 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
38050 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
38051 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38059 tmp
= gen_reg_rtx (V2DImode
);
38061 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
38063 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
38064 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38070 /* ??? Could extract the appropriate HImode element and shift. */
38077 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
38078 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
38080 /* Let the rtl optimizers know about the zero extension performed. */
38081 if (inner_mode
== QImode
|| inner_mode
== HImode
)
38083 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
38084 target
= gen_lowpart (SImode
, target
);
38087 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38091 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
38093 emit_move_insn (mem
, vec
);
38095 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
38096 emit_move_insn (target
, tmp
);
38100 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
38101 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
38102 The upper bits of DEST are undefined, though they shouldn't cause
38103 exceptions (some bits from src or all zeros are ok). */
38106 emit_reduc_half (rtx dest
, rtx src
, int i
)
38109 switch (GET_MODE (src
))
38113 tem
= gen_sse_movhlps (dest
, src
, src
);
38115 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
38116 GEN_INT (1 + 4), GEN_INT (1 + 4));
38119 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
38125 d
= gen_reg_rtx (V1TImode
);
38126 tem
= gen_sse2_lshrv1ti3 (d
, gen_lowpart (V1TImode
, src
),
38131 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
38133 tem
= gen_avx_shufps256 (dest
, src
, src
,
38134 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
38138 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
38140 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
38148 if (GET_MODE (dest
) != V4DImode
)
38149 d
= gen_reg_rtx (V4DImode
);
38150 tem
= gen_avx2_permv2ti (d
, gen_lowpart (V4DImode
, src
),
38151 gen_lowpart (V4DImode
, src
),
38156 d
= gen_reg_rtx (V2TImode
);
38157 tem
= gen_avx2_lshrv2ti3 (d
, gen_lowpart (V2TImode
, src
),
38162 gcc_unreachable ();
38166 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
38169 /* Expand a vector reduction. FN is the binary pattern to reduce;
38170 DEST is the destination; IN is the input vector. */
38173 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
38175 rtx half
, dst
, vec
= in
;
38176 enum machine_mode mode
= GET_MODE (in
);
38179 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
38181 && mode
== V8HImode
38182 && fn
== gen_uminv8hi3
)
38184 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
38188 for (i
= GET_MODE_BITSIZE (mode
);
38189 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
38192 half
= gen_reg_rtx (mode
);
38193 emit_reduc_half (half
, vec
, i
);
38194 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
38197 dst
= gen_reg_rtx (mode
);
38198 emit_insn (fn (dst
, half
, vec
));
38203 /* Target hook for scalar_mode_supported_p. */
38205 ix86_scalar_mode_supported_p (enum machine_mode mode
)
38207 if (DECIMAL_FLOAT_MODE_P (mode
))
38208 return default_decimal_float_supported_p ();
38209 else if (mode
== TFmode
)
38212 return default_scalar_mode_supported_p (mode
);
38215 /* Implements target hook vector_mode_supported_p. */
38217 ix86_vector_mode_supported_p (enum machine_mode mode
)
38219 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
38221 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
38223 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
38225 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
38227 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
38232 /* Target hook for c_mode_for_suffix. */
38233 static enum machine_mode
38234 ix86_c_mode_for_suffix (char suffix
)
38244 /* Worker function for TARGET_MD_ASM_CLOBBERS.
38246 We do this in the new i386 backend to maintain source compatibility
38247 with the old cc0-based compiler. */
38250 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
38251 tree inputs ATTRIBUTE_UNUSED
,
38254 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
38256 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
38261 /* Implements target vector targetm.asm.encode_section_info. */
38263 static void ATTRIBUTE_UNUSED
38264 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
38266 default_encode_section_info (decl
, rtl
, first
);
38268 if (TREE_CODE (decl
) == VAR_DECL
38269 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
38270 && ix86_in_large_data_p (decl
))
38271 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
38274 /* Worker function for REVERSE_CONDITION. */
38277 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
38279 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
38280 ? reverse_condition (code
)
38281 : reverse_condition_maybe_unordered (code
));
38284 /* Output code to perform an x87 FP register move, from OPERANDS[1]
38288 output_387_reg_move (rtx insn
, rtx
*operands
)
38290 if (REG_P (operands
[0]))
38292 if (REG_P (operands
[1])
38293 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38295 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
38296 return output_387_ffreep (operands
, 0);
38297 return "fstp\t%y0";
38299 if (STACK_TOP_P (operands
[0]))
38300 return "fld%Z1\t%y1";
38303 else if (MEM_P (operands
[0]))
38305 gcc_assert (REG_P (operands
[1]));
38306 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38307 return "fstp%Z0\t%y0";
38310 /* There is no non-popping store to memory for XFmode.
38311 So if we need one, follow the store with a load. */
38312 if (GET_MODE (operands
[0]) == XFmode
)
38313 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
38315 return "fst%Z0\t%y0";
38322 /* Output code to perform a conditional jump to LABEL, if C2 flag in
38323 FP status register is set. */
38326 ix86_emit_fp_unordered_jump (rtx label
)
38328 rtx reg
= gen_reg_rtx (HImode
);
38331 emit_insn (gen_x86_fnstsw_1 (reg
));
38333 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
38335 emit_insn (gen_x86_sahf_1 (reg
));
38337 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
38338 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
38342 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
38344 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38345 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
38348 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
38349 gen_rtx_LABEL_REF (VOIDmode
, label
),
38351 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
38353 emit_jump_insn (temp
);
38354 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
38357 /* Output code to perform a log1p XFmode calculation. */
38359 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
38361 rtx label1
= gen_label_rtx ();
38362 rtx label2
= gen_label_rtx ();
38364 rtx tmp
= gen_reg_rtx (XFmode
);
38365 rtx tmp2
= gen_reg_rtx (XFmode
);
38368 emit_insn (gen_absxf2 (tmp
, op1
));
38369 test
= gen_rtx_GE (VOIDmode
, tmp
,
38370 CONST_DOUBLE_FROM_REAL_VALUE (
38371 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
38373 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
38375 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38376 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
38377 emit_jump (label2
);
38379 emit_label (label1
);
38380 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
38381 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
38382 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38383 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
38385 emit_label (label2
);
38388 /* Emit code for round calculation. */
38389 void ix86_emit_i387_round (rtx op0
, rtx op1
)
38391 enum machine_mode inmode
= GET_MODE (op1
);
38392 enum machine_mode outmode
= GET_MODE (op0
);
38393 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
38394 rtx scratch
= gen_reg_rtx (HImode
);
38395 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38396 rtx jump_label
= gen_label_rtx ();
38398 rtx (*gen_abs
) (rtx
, rtx
);
38399 rtx (*gen_neg
) (rtx
, rtx
);
38404 gen_abs
= gen_abssf2
;
38407 gen_abs
= gen_absdf2
;
38410 gen_abs
= gen_absxf2
;
38413 gcc_unreachable ();
38419 gen_neg
= gen_negsf2
;
38422 gen_neg
= gen_negdf2
;
38425 gen_neg
= gen_negxf2
;
38428 gen_neg
= gen_neghi2
;
38431 gen_neg
= gen_negsi2
;
38434 gen_neg
= gen_negdi2
;
38437 gcc_unreachable ();
38440 e1
= gen_reg_rtx (inmode
);
38441 e2
= gen_reg_rtx (inmode
);
38442 res
= gen_reg_rtx (outmode
);
38444 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
38446 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
38448 /* scratch = fxam(op1) */
38449 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
38450 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
38452 /* e1 = fabs(op1) */
38453 emit_insn (gen_abs (e1
, op1
));
38455 /* e2 = e1 + 0.5 */
38456 half
= force_reg (inmode
, half
);
38457 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38458 gen_rtx_PLUS (inmode
, e1
, half
)));
38460 /* res = floor(e2) */
38461 if (inmode
!= XFmode
)
38463 tmp1
= gen_reg_rtx (XFmode
);
38465 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
38466 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
38476 rtx tmp0
= gen_reg_rtx (XFmode
);
38478 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
38480 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38481 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
38482 UNSPEC_TRUNC_NOOP
)));
38486 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
38489 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
38492 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
38495 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
38498 gcc_unreachable ();
38501 /* flags = signbit(a) */
38502 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
38504 /* if (flags) then res = -res */
38505 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
38506 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
38507 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
38509 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38510 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
38511 JUMP_LABEL (insn
) = jump_label
;
38513 emit_insn (gen_neg (res
, res
));
38515 emit_label (jump_label
);
38516 LABEL_NUSES (jump_label
) = 1;
38518 emit_move_insn (op0
, res
);
38521 /* Output code to perform a Newton-Rhapson approximation of a single precision
38522 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
38524 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
38526 rtx x0
, x1
, e0
, e1
;
38528 x0
= gen_reg_rtx (mode
);
38529 e0
= gen_reg_rtx (mode
);
38530 e1
= gen_reg_rtx (mode
);
38531 x1
= gen_reg_rtx (mode
);
38533 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
38535 b
= force_reg (mode
, b
);
38537 /* x0 = rcp(b) estimate */
38538 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38539 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
38542 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38543 gen_rtx_MULT (mode
, x0
, b
)));
38546 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38547 gen_rtx_MULT (mode
, x0
, e0
)));
38550 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38551 gen_rtx_PLUS (mode
, x0
, x0
)));
38554 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
38555 gen_rtx_MINUS (mode
, e1
, e0
)));
38558 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38559 gen_rtx_MULT (mode
, a
, x1
)));
38562 /* Output code to perform a Newton-Rhapson approximation of a
38563 single precision floating point [reciprocal] square root. */
38565 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
38568 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
38571 x0
= gen_reg_rtx (mode
);
38572 e0
= gen_reg_rtx (mode
);
38573 e1
= gen_reg_rtx (mode
);
38574 e2
= gen_reg_rtx (mode
);
38575 e3
= gen_reg_rtx (mode
);
38577 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
38578 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38580 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
38581 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38583 if (VECTOR_MODE_P (mode
))
38585 mthree
= ix86_build_const_vector (mode
, true, mthree
);
38586 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
38589 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
38590 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
38592 a
= force_reg (mode
, a
);
38594 /* x0 = rsqrt(a) estimate */
38595 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38596 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
38599 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
38604 zero
= gen_reg_rtx (mode
);
38605 mask
= gen_reg_rtx (mode
);
38607 zero
= force_reg (mode
, CONST0_RTX(mode
));
38608 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38609 gen_rtx_NE (mode
, zero
, a
)));
38611 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38612 gen_rtx_AND (mode
, x0
, mask
)));
38616 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38617 gen_rtx_MULT (mode
, x0
, a
)));
38619 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38620 gen_rtx_MULT (mode
, e0
, x0
)));
38623 mthree
= force_reg (mode
, mthree
);
38624 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38625 gen_rtx_PLUS (mode
, e1
, mthree
)));
38627 mhalf
= force_reg (mode
, mhalf
);
38629 /* e3 = -.5 * x0 */
38630 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38631 gen_rtx_MULT (mode
, x0
, mhalf
)));
38633 /* e3 = -.5 * e0 */
38634 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38635 gen_rtx_MULT (mode
, e0
, mhalf
)));
38636 /* ret = e2 * e3 */
38637 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38638 gen_rtx_MULT (mode
, e2
, e3
)));
38641 #ifdef TARGET_SOLARIS
38642 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38645 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38648 /* With Binutils 2.15, the "@unwind" marker must be specified on
38649 every occurrence of the ".eh_frame" section, not just the first
38652 && strcmp (name
, ".eh_frame") == 0)
38654 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38655 flags
& SECTION_WRITE
? "aw" : "a");
38660 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38662 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38667 default_elf_asm_named_section (name
, flags
, decl
);
38669 #endif /* TARGET_SOLARIS */
38671 /* Return the mangling of TYPE if it is an extended fundamental type. */
38673 static const char *
38674 ix86_mangle_type (const_tree type
)
38676 type
= TYPE_MAIN_VARIANT (type
);
38678 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38679 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38682 switch (TYPE_MODE (type
))
38685 /* __float128 is "g". */
38688 /* "long double" or __float80 is "e". */
38695 /* For 32-bit code we can save PIC register setup by using
38696 __stack_chk_fail_local hidden function instead of calling
38697 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38698 register, so it is better to call __stack_chk_fail directly. */
38700 static tree ATTRIBUTE_UNUSED
38701 ix86_stack_protect_fail (void)
38703 return TARGET_64BIT
38704 ? default_external_stack_protect_fail ()
38705 : default_hidden_stack_protect_fail ();
38708 /* Select a format to encode pointers in exception handling data. CODE
38709 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38710 true if the symbol may be affected by dynamic relocations.
38712 ??? All x86 object file formats are capable of representing this.
38713 After all, the relocation needed is the same as for the call insn.
38714 Whether or not a particular assembler allows us to enter such, I
38715 guess we'll have to see. */
38717 asm_preferred_eh_data_format (int code
, int global
)
38721 int type
= DW_EH_PE_sdata8
;
38723 || ix86_cmodel
== CM_SMALL_PIC
38724 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38725 type
= DW_EH_PE_sdata4
;
38726 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38728 if (ix86_cmodel
== CM_SMALL
38729 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38730 return DW_EH_PE_udata4
;
38731 return DW_EH_PE_absptr
;
38734 /* Expand copysign from SIGN to the positive value ABS_VALUE
38735 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38738 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38740 enum machine_mode mode
= GET_MODE (sign
);
38741 rtx sgn
= gen_reg_rtx (mode
);
38742 if (mask
== NULL_RTX
)
38744 enum machine_mode vmode
;
38746 if (mode
== SFmode
)
38748 else if (mode
== DFmode
)
38753 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38754 if (!VECTOR_MODE_P (mode
))
38756 /* We need to generate a scalar mode mask in this case. */
38757 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38758 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38759 mask
= gen_reg_rtx (mode
);
38760 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38764 mask
= gen_rtx_NOT (mode
, mask
);
38765 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38766 gen_rtx_AND (mode
, mask
, sign
)));
38767 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38768 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38771 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38772 mask for masking out the sign-bit is stored in *SMASK, if that is
38775 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38777 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38780 xa
= gen_reg_rtx (mode
);
38781 if (mode
== SFmode
)
38783 else if (mode
== DFmode
)
38787 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38788 if (!VECTOR_MODE_P (mode
))
38790 /* We need to generate a scalar mode mask in this case. */
38791 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38792 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38793 mask
= gen_reg_rtx (mode
);
38794 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38796 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38797 gen_rtx_AND (mode
, op0
, mask
)));
38805 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38806 swapping the operands if SWAP_OPERANDS is true. The expanded
38807 code is a forward jump to a newly created label in case the
38808 comparison is true. The generated label rtx is returned. */
38810 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38811 bool swap_operands
)
38813 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
38823 label
= gen_label_rtx ();
38824 tmp
= gen_rtx_REG (fpcmp_mode
, FLAGS_REG
);
38825 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38826 gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
)));
38827 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38828 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38829 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38830 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38831 JUMP_LABEL (tmp
) = label
;
38836 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
38837 using comparison code CODE. Operands are swapped for the comparison if
38838 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
38840 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
38841 bool swap_operands
)
38843 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
38844 enum machine_mode mode
= GET_MODE (op0
);
38845 rtx mask
= gen_reg_rtx (mode
);
38854 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
38856 emit_insn (insn (mask
, op0
, op1
,
38857 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
38861 /* Generate and return a rtx of mode MODE for 2**n where n is the number
38862 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
38864 ix86_gen_TWO52 (enum machine_mode mode
)
38866 REAL_VALUE_TYPE TWO52r
;
38869 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
38870 TWO52
= const_double_from_real_value (TWO52r
, mode
);
38871 TWO52
= force_reg (mode
, TWO52
);
38876 /* Expand SSE sequence for computing lround from OP1 storing
38879 ix86_expand_lround (rtx op0
, rtx op1
)
38881 /* C code for the stuff we're doing below:
38882 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
38885 enum machine_mode mode
= GET_MODE (op1
);
38886 const struct real_format
*fmt
;
38887 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38890 /* load nextafter (0.5, 0.0) */
38891 fmt
= REAL_MODE_FORMAT (mode
);
38892 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38893 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38895 /* adj = copysign (0.5, op1) */
38896 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38897 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
38899 /* adj = op1 + adj */
38900 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38902 /* op0 = (imode)adj */
38903 expand_fix (op0
, adj
, 0);
38906 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
38909 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
38911 /* C code for the stuff we're doing below (for do_floor):
38913 xi -= (double)xi > op1 ? 1 : 0;
38916 enum machine_mode fmode
= GET_MODE (op1
);
38917 enum machine_mode imode
= GET_MODE (op0
);
38918 rtx ireg
, freg
, label
, tmp
;
38920 /* reg = (long)op1 */
38921 ireg
= gen_reg_rtx (imode
);
38922 expand_fix (ireg
, op1
, 0);
38924 /* freg = (double)reg */
38925 freg
= gen_reg_rtx (fmode
);
38926 expand_float (freg
, ireg
, 0);
38928 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38929 label
= ix86_expand_sse_compare_and_jump (UNLE
,
38930 freg
, op1
, !do_floor
);
38931 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
38932 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
38933 emit_move_insn (ireg
, tmp
);
38935 emit_label (label
);
38936 LABEL_NUSES (label
) = 1;
38938 emit_move_insn (op0
, ireg
);
38941 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38942 result in OPERAND0. */
38944 ix86_expand_rint (rtx operand0
, rtx operand1
)
38946 /* C code for the stuff we're doing below:
38947 xa = fabs (operand1);
38948 if (!isless (xa, 2**52))
38950 xa = xa + 2**52 - 2**52;
38951 return copysign (xa, operand1);
38953 enum machine_mode mode
= GET_MODE (operand0
);
38954 rtx res
, xa
, label
, TWO52
, mask
;
38956 res
= gen_reg_rtx (mode
);
38957 emit_move_insn (res
, operand1
);
38959 /* xa = abs (operand1) */
38960 xa
= ix86_expand_sse_fabs (res
, &mask
);
38962 /* if (!isless (xa, TWO52)) goto label; */
38963 TWO52
= ix86_gen_TWO52 (mode
);
38964 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38966 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38967 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38969 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
38971 emit_label (label
);
38972 LABEL_NUSES (label
) = 1;
38974 emit_move_insn (operand0
, res
);
38977 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38980 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
38982 /* C code for the stuff we expand below.
38983 double xa = fabs (x), x2;
38984 if (!isless (xa, TWO52))
38986 xa = xa + TWO52 - TWO52;
38987 x2 = copysign (xa, x);
38996 enum machine_mode mode
= GET_MODE (operand0
);
38997 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
38999 TWO52
= ix86_gen_TWO52 (mode
);
39001 /* Temporary for holding the result, initialized to the input
39002 operand to ease control flow. */
39003 res
= gen_reg_rtx (mode
);
39004 emit_move_insn (res
, operand1
);
39006 /* xa = abs (operand1) */
39007 xa
= ix86_expand_sse_fabs (res
, &mask
);
39009 /* if (!isless (xa, TWO52)) goto label; */
39010 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39012 /* xa = xa + TWO52 - TWO52; */
39013 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39014 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
39016 /* xa = copysign (xa, operand1) */
39017 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
39019 /* generate 1.0 or -1.0 */
39020 one
= force_reg (mode
,
39021 const_double_from_real_value (do_floor
39022 ? dconst1
: dconstm1
, mode
));
39024 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39025 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39026 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39027 gen_rtx_AND (mode
, one
, tmp
)));
39028 /* We always need to subtract here to preserve signed zero. */
39029 tmp
= expand_simple_binop (mode
, MINUS
,
39030 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39031 emit_move_insn (res
, tmp
);
39033 emit_label (label
);
39034 LABEL_NUSES (label
) = 1;
39036 emit_move_insn (operand0
, res
);
39039 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
39042 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
39044 /* C code for the stuff we expand below.
39045 double xa = fabs (x), x2;
39046 if (!isless (xa, TWO52))
39048 x2 = (double)(long)x;
39055 if (HONOR_SIGNED_ZEROS (mode))
39056 return copysign (x2, x);
39059 enum machine_mode mode
= GET_MODE (operand0
);
39060 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
39062 TWO52
= ix86_gen_TWO52 (mode
);
39064 /* Temporary for holding the result, initialized to the input
39065 operand to ease control flow. */
39066 res
= gen_reg_rtx (mode
);
39067 emit_move_insn (res
, operand1
);
39069 /* xa = abs (operand1) */
39070 xa
= ix86_expand_sse_fabs (res
, &mask
);
39072 /* if (!isless (xa, TWO52)) goto label; */
39073 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39075 /* xa = (double)(long)x */
39076 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39077 expand_fix (xi
, res
, 0);
39078 expand_float (xa
, xi
, 0);
39081 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39083 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39084 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39085 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39086 gen_rtx_AND (mode
, one
, tmp
)));
39087 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
39088 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39089 emit_move_insn (res
, tmp
);
39091 if (HONOR_SIGNED_ZEROS (mode
))
39092 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39094 emit_label (label
);
39095 LABEL_NUSES (label
) = 1;
39097 emit_move_insn (operand0
, res
);
39100 /* Expand SSE sequence for computing round from OPERAND1 storing
39101 into OPERAND0. Sequence that works without relying on DImode truncation
39102 via cvttsd2siq that is only available on 64bit targets. */
39104 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
39106 /* C code for the stuff we expand below.
39107 double xa = fabs (x), xa2, x2;
39108 if (!isless (xa, TWO52))
39110 Using the absolute value and copying back sign makes
39111 -0.0 -> -0.0 correct.
39112 xa2 = xa + TWO52 - TWO52;
39117 else if (dxa > 0.5)
39119 x2 = copysign (xa2, x);
39122 enum machine_mode mode
= GET_MODE (operand0
);
39123 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
39125 TWO52
= ix86_gen_TWO52 (mode
);
39127 /* Temporary for holding the result, initialized to the input
39128 operand to ease control flow. */
39129 res
= gen_reg_rtx (mode
);
39130 emit_move_insn (res
, operand1
);
39132 /* xa = abs (operand1) */
39133 xa
= ix86_expand_sse_fabs (res
, &mask
);
39135 /* if (!isless (xa, TWO52)) goto label; */
39136 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39138 /* xa2 = xa + TWO52 - TWO52; */
39139 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39140 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
39142 /* dxa = xa2 - xa; */
39143 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
39145 /* generate 0.5, 1.0 and -0.5 */
39146 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
39147 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39148 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
39152 tmp
= gen_reg_rtx (mode
);
39153 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
39154 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
39155 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39156 gen_rtx_AND (mode
, one
, tmp
)));
39157 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39158 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
39159 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
39160 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39161 gen_rtx_AND (mode
, one
, tmp
)));
39162 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39164 /* res = copysign (xa2, operand1) */
39165 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
39167 emit_label (label
);
39168 LABEL_NUSES (label
) = 1;
39170 emit_move_insn (operand0
, res
);
39173 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39176 ix86_expand_trunc (rtx operand0
, rtx operand1
)
39178 /* C code for SSE variant we expand below.
39179 double xa = fabs (x), x2;
39180 if (!isless (xa, TWO52))
39182 x2 = (double)(long)x;
39183 if (HONOR_SIGNED_ZEROS (mode))
39184 return copysign (x2, x);
39187 enum machine_mode mode
= GET_MODE (operand0
);
39188 rtx xa
, xi
, TWO52
, label
, res
, mask
;
39190 TWO52
= ix86_gen_TWO52 (mode
);
39192 /* Temporary for holding the result, initialized to the input
39193 operand to ease control flow. */
39194 res
= gen_reg_rtx (mode
);
39195 emit_move_insn (res
, operand1
);
39197 /* xa = abs (operand1) */
39198 xa
= ix86_expand_sse_fabs (res
, &mask
);
39200 /* if (!isless (xa, TWO52)) goto label; */
39201 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39203 /* x = (double)(long)x */
39204 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39205 expand_fix (xi
, res
, 0);
39206 expand_float (res
, xi
, 0);
39208 if (HONOR_SIGNED_ZEROS (mode
))
39209 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39211 emit_label (label
);
39212 LABEL_NUSES (label
) = 1;
39214 emit_move_insn (operand0
, res
);
39217 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39220 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
39222 enum machine_mode mode
= GET_MODE (operand0
);
39223 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
39225 /* C code for SSE variant we expand below.
39226 double xa = fabs (x), x2;
39227 if (!isless (xa, TWO52))
39229 xa2 = xa + TWO52 - TWO52;
39233 x2 = copysign (xa2, x);
39237 TWO52
= ix86_gen_TWO52 (mode
);
39239 /* Temporary for holding the result, initialized to the input
39240 operand to ease control flow. */
39241 res
= gen_reg_rtx (mode
);
39242 emit_move_insn (res
, operand1
);
39244 /* xa = abs (operand1) */
39245 xa
= ix86_expand_sse_fabs (res
, &smask
);
39247 /* if (!isless (xa, TWO52)) goto label; */
39248 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39250 /* res = xa + TWO52 - TWO52; */
39251 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39252 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
39253 emit_move_insn (res
, tmp
);
39256 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39258 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
39259 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
39260 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
39261 gen_rtx_AND (mode
, mask
, one
)));
39262 tmp
= expand_simple_binop (mode
, MINUS
,
39263 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
39264 emit_move_insn (res
, tmp
);
39266 /* res = copysign (res, operand1) */
39267 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
39269 emit_label (label
);
39270 LABEL_NUSES (label
) = 1;
39272 emit_move_insn (operand0
, res
);
39275 /* Expand SSE sequence for computing round from OPERAND1 storing
39278 ix86_expand_round (rtx operand0
, rtx operand1
)
39280 /* C code for the stuff we're doing below:
39281 double xa = fabs (x);
39282 if (!isless (xa, TWO52))
39284 xa = (double)(long)(xa + nextafter (0.5, 0.0));
39285 return copysign (xa, x);
39287 enum machine_mode mode
= GET_MODE (operand0
);
39288 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
39289 const struct real_format
*fmt
;
39290 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39292 /* Temporary for holding the result, initialized to the input
39293 operand to ease control flow. */
39294 res
= gen_reg_rtx (mode
);
39295 emit_move_insn (res
, operand1
);
39297 TWO52
= ix86_gen_TWO52 (mode
);
39298 xa
= ix86_expand_sse_fabs (res
, &mask
);
39299 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39301 /* load nextafter (0.5, 0.0) */
39302 fmt
= REAL_MODE_FORMAT (mode
);
39303 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39304 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39306 /* xa = xa + 0.5 */
39307 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
39308 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39310 /* xa = (double)(int64_t)xa */
39311 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39312 expand_fix (xi
, xa
, 0);
39313 expand_float (xa
, xi
, 0);
39315 /* res = copysign (xa, operand1) */
39316 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
39318 emit_label (label
);
39319 LABEL_NUSES (label
) = 1;
39321 emit_move_insn (operand0
, res
);
39324 /* Expand SSE sequence for computing round
39325 from OP1 storing into OP0 using sse4 round insn. */
39327 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
39329 enum machine_mode mode
= GET_MODE (op0
);
39330 rtx e1
, e2
, res
, half
;
39331 const struct real_format
*fmt
;
39332 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39333 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
39334 rtx (*gen_round
) (rtx
, rtx
, rtx
);
39339 gen_copysign
= gen_copysignsf3
;
39340 gen_round
= gen_sse4_1_roundsf2
;
39343 gen_copysign
= gen_copysigndf3
;
39344 gen_round
= gen_sse4_1_rounddf2
;
39347 gcc_unreachable ();
39350 /* round (a) = trunc (a + copysign (0.5, a)) */
39352 /* load nextafter (0.5, 0.0) */
39353 fmt
= REAL_MODE_FORMAT (mode
);
39354 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39355 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39356 half
= const_double_from_real_value (pred_half
, mode
);
39358 /* e1 = copysign (0.5, op1) */
39359 e1
= gen_reg_rtx (mode
);
39360 emit_insn (gen_copysign (e1
, half
, op1
));
39362 /* e2 = op1 + e1 */
39363 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
39365 /* res = trunc (e2) */
39366 res
= gen_reg_rtx (mode
);
39367 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
39369 emit_move_insn (op0
, res
);
39373 /* Table of valid machine attributes. */
39374 static const struct attribute_spec ix86_attribute_table
[] =
39376 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
39377 affects_type_identity } */
39378 /* Stdcall attribute says callee is responsible for popping arguments
39379 if they are not variable. */
39380 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39382 /* Fastcall attribute says callee is responsible for popping arguments
39383 if they are not variable. */
39384 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39386 /* Thiscall attribute says callee is responsible for popping arguments
39387 if they are not variable. */
39388 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39390 /* Cdecl attribute says the callee is a normal C declaration */
39391 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39393 /* Regparm attribute specifies how many integer arguments are to be
39394 passed in registers. */
39395 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
39397 /* Sseregparm attribute says we are using x86_64 calling conventions
39398 for FP arguments. */
39399 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39401 /* The transactional memory builtins are implicitly regparm or fastcall
39402 depending on the ABI. Override the generic do-nothing attribute that
39403 these builtins were declared with. */
39404 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
39406 /* force_align_arg_pointer says this function realigns the stack at entry. */
39407 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
39408 false, true, true, ix86_handle_cconv_attribute
, false },
39409 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39410 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
39411 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
39412 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
39415 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39417 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39419 #ifdef SUBTARGET_ATTRIBUTE_TABLE
39420 SUBTARGET_ATTRIBUTE_TABLE
,
39422 /* ms_abi and sysv_abi calling convention function attributes. */
39423 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39424 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39425 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
39427 { "callee_pop_aggregate_return", 1, 1, false, true, true,
39428 ix86_handle_callee_pop_aggregate_return
, true },
39430 { NULL
, 0, 0, false, false, false, NULL
, false }
39433 /* Implement targetm.vectorize.builtin_vectorization_cost. */
39435 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
39437 int misalign ATTRIBUTE_UNUSED
)
39441 switch (type_of_cost
)
39444 return ix86_cost
->scalar_stmt_cost
;
39447 return ix86_cost
->scalar_load_cost
;
39450 return ix86_cost
->scalar_store_cost
;
39453 return ix86_cost
->vec_stmt_cost
;
39456 return ix86_cost
->vec_align_load_cost
;
39459 return ix86_cost
->vec_store_cost
;
39461 case vec_to_scalar
:
39462 return ix86_cost
->vec_to_scalar_cost
;
39464 case scalar_to_vec
:
39465 return ix86_cost
->scalar_to_vec_cost
;
39467 case unaligned_load
:
39468 case unaligned_store
:
39469 return ix86_cost
->vec_unalign_load_cost
;
39471 case cond_branch_taken
:
39472 return ix86_cost
->cond_taken_branch_cost
;
39474 case cond_branch_not_taken
:
39475 return ix86_cost
->cond_not_taken_branch_cost
;
39478 case vec_promote_demote
:
39479 return ix86_cost
->vec_stmt_cost
;
39481 case vec_construct
:
39482 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
39483 return elements
/ 2 + 1;
39486 gcc_unreachable ();
39490 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
39491 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
39492 insn every time. */
39494 static GTY(()) rtx vselect_insn
;
39496 /* Initialize vselect_insn. */
39499 init_vselect_insn (void)
39504 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
39505 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
39506 XVECEXP (x
, 0, i
) = const0_rtx
;
39507 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
39509 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
39511 vselect_insn
= emit_insn (x
);
39515 /* Construct (set target (vec_select op0 (parallel perm))) and
39516 return true if that's a valid instruction in the active ISA. */
39519 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
39520 unsigned nelt
, bool testing_p
)
39523 rtx x
, save_vconcat
;
39526 if (vselect_insn
== NULL_RTX
)
39527 init_vselect_insn ();
39529 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
39530 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
39531 for (i
= 0; i
< nelt
; ++i
)
39532 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
39533 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39534 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
39535 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
39536 SET_DEST (PATTERN (vselect_insn
)) = target
;
39537 icode
= recog_memoized (vselect_insn
);
39539 if (icode
>= 0 && !testing_p
)
39540 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
39542 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
39543 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
39544 INSN_CODE (vselect_insn
) = -1;
39549 /* Similar, but generate a vec_concat from op0 and op1 as well. */
39552 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
39553 const unsigned char *perm
, unsigned nelt
,
39556 enum machine_mode v2mode
;
39560 if (vselect_insn
== NULL_RTX
)
39561 init_vselect_insn ();
39563 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
39564 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39565 PUT_MODE (x
, v2mode
);
39568 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
39569 XEXP (x
, 0) = const0_rtx
;
39570 XEXP (x
, 1) = const0_rtx
;
39574 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39575 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
39578 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
39580 enum machine_mode vmode
= d
->vmode
;
39581 unsigned i
, mask
, nelt
= d
->nelt
;
39582 rtx target
, op0
, op1
, x
;
39583 rtx rperm
[32], vperm
;
39585 if (d
->one_operand_p
)
39587 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
39589 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
39591 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
39596 /* This is a blend, not a permute. Elements must stay in their
39597 respective lanes. */
39598 for (i
= 0; i
< nelt
; ++i
)
39600 unsigned e
= d
->perm
[i
];
39601 if (!(e
== i
|| e
== i
+ nelt
))
39608 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
39609 decision should be extracted elsewhere, so that we only try that
39610 sequence once all budget==3 options have been tried. */
39611 target
= d
->target
;
39624 for (i
= 0; i
< nelt
; ++i
)
39625 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39629 for (i
= 0; i
< 2; ++i
)
39630 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39635 for (i
= 0; i
< 4; ++i
)
39636 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39641 /* See if bytes move in pairs so we can use pblendw with
39642 an immediate argument, rather than pblendvb with a vector
39644 for (i
= 0; i
< 16; i
+= 2)
39645 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39648 for (i
= 0; i
< nelt
; ++i
)
39649 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39652 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39653 vperm
= force_reg (vmode
, vperm
);
39655 if (GET_MODE_SIZE (vmode
) == 16)
39656 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39658 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39659 if (target
!= d
->target
)
39660 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39664 for (i
= 0; i
< 8; ++i
)
39665 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39670 target
= gen_reg_rtx (vmode
);
39671 op0
= gen_lowpart (vmode
, op0
);
39672 op1
= gen_lowpart (vmode
, op1
);
39676 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39677 for (i
= 0; i
< 32; i
+= 2)
39678 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39680 /* See if bytes move in quadruplets. If yes, vpblendd
39681 with immediate can be used. */
39682 for (i
= 0; i
< 32; i
+= 4)
39683 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39687 /* See if bytes move the same in both lanes. If yes,
39688 vpblendw with immediate can be used. */
39689 for (i
= 0; i
< 16; i
+= 2)
39690 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39693 /* Use vpblendw. */
39694 for (i
= 0; i
< 16; ++i
)
39695 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39700 /* Use vpblendd. */
39701 for (i
= 0; i
< 8; ++i
)
39702 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39707 /* See if words move in pairs. If yes, vpblendd can be used. */
39708 for (i
= 0; i
< 16; i
+= 2)
39709 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39713 /* See if words move the same in both lanes. If not,
39714 vpblendvb must be used. */
39715 for (i
= 0; i
< 8; i
++)
39716 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39718 /* Use vpblendvb. */
39719 for (i
= 0; i
< 32; ++i
)
39720 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39724 target
= gen_reg_rtx (vmode
);
39725 op0
= gen_lowpart (vmode
, op0
);
39726 op1
= gen_lowpart (vmode
, op1
);
39727 goto finish_pblendvb
;
39730 /* Use vpblendw. */
39731 for (i
= 0; i
< 16; ++i
)
39732 mask
|= (d
->perm
[i
] >= 16) << i
;
39736 /* Use vpblendd. */
39737 for (i
= 0; i
< 8; ++i
)
39738 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39743 /* Use vpblendd. */
39744 for (i
= 0; i
< 4; ++i
)
39745 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39750 gcc_unreachable ();
39753 /* This matches five different patterns with the different modes. */
39754 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39755 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39757 if (target
!= d
->target
)
39758 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39763 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39764 in terms of the variable form of vpermilps.
39766 Note that we will have already failed the immediate input vpermilps,
39767 which requires that the high and low part shuffle be identical; the
39768 variable form doesn't require that. */
39771 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39773 rtx rperm
[8], vperm
;
39776 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39779 /* We can only permute within the 128-bit lane. */
39780 for (i
= 0; i
< 8; ++i
)
39782 unsigned e
= d
->perm
[i
];
39783 if (i
< 4 ? e
>= 4 : e
< 4)
39790 for (i
= 0; i
< 8; ++i
)
39792 unsigned e
= d
->perm
[i
];
39794 /* Within each 128-bit lane, the elements of op0 are numbered
39795 from 0 and the elements of op1 are numbered from 4. */
39801 rperm
[i
] = GEN_INT (e
);
39804 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39805 vperm
= force_reg (V8SImode
, vperm
);
39806 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39811 /* Return true if permutation D can be performed as VMODE permutation
39815 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39817 unsigned int i
, j
, chunk
;
39819 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39820 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39821 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39824 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39827 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39828 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39829 if (d
->perm
[i
] & (chunk
- 1))
39832 for (j
= 1; j
< chunk
; ++j
)
39833 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
39839 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39840 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
39843 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
39845 unsigned i
, nelt
, eltsz
, mask
;
39846 unsigned char perm
[32];
39847 enum machine_mode vmode
= V16QImode
;
39848 rtx rperm
[32], vperm
, target
, op0
, op1
;
39852 if (!d
->one_operand_p
)
39854 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
39857 && valid_perm_using_mode_p (V2TImode
, d
))
39862 /* Use vperm2i128 insn. The pattern uses
39863 V4DImode instead of V2TImode. */
39864 target
= d
->target
;
39865 if (d
->vmode
!= V4DImode
)
39866 target
= gen_reg_rtx (V4DImode
);
39867 op0
= gen_lowpart (V4DImode
, d
->op0
);
39868 op1
= gen_lowpart (V4DImode
, d
->op1
);
39870 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
39871 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
39872 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
39873 if (target
!= d
->target
)
39874 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39882 if (GET_MODE_SIZE (d
->vmode
) == 16)
39887 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39892 /* V4DImode should be already handled through
39893 expand_vselect by vpermq instruction. */
39894 gcc_assert (d
->vmode
!= V4DImode
);
39897 if (d
->vmode
== V8SImode
39898 || d
->vmode
== V16HImode
39899 || d
->vmode
== V32QImode
)
39901 /* First see if vpermq can be used for
39902 V8SImode/V16HImode/V32QImode. */
39903 if (valid_perm_using_mode_p (V4DImode
, d
))
39905 for (i
= 0; i
< 4; i
++)
39906 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
39909 target
= gen_reg_rtx (V4DImode
);
39910 if (expand_vselect (target
, gen_lowpart (V4DImode
, d
->op0
),
39913 emit_move_insn (d
->target
,
39914 gen_lowpart (d
->vmode
, target
));
39920 /* Next see if vpermd can be used. */
39921 if (valid_perm_using_mode_p (V8SImode
, d
))
39924 /* Or if vpermps can be used. */
39925 else if (d
->vmode
== V8SFmode
)
39928 if (vmode
== V32QImode
)
39930 /* vpshufb only works intra lanes, it is not
39931 possible to shuffle bytes in between the lanes. */
39932 for (i
= 0; i
< nelt
; ++i
)
39933 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
39944 if (vmode
== V8SImode
)
39945 for (i
= 0; i
< 8; ++i
)
39946 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
39949 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39950 if (!d
->one_operand_p
)
39951 mask
= 2 * nelt
- 1;
39952 else if (vmode
== V16QImode
)
39955 mask
= nelt
/ 2 - 1;
39957 for (i
= 0; i
< nelt
; ++i
)
39959 unsigned j
, e
= d
->perm
[i
] & mask
;
39960 for (j
= 0; j
< eltsz
; ++j
)
39961 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
39965 vperm
= gen_rtx_CONST_VECTOR (vmode
,
39966 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
39967 vperm
= force_reg (vmode
, vperm
);
39969 target
= d
->target
;
39970 if (d
->vmode
!= vmode
)
39971 target
= gen_reg_rtx (vmode
);
39972 op0
= gen_lowpart (vmode
, d
->op0
);
39973 if (d
->one_operand_p
)
39975 if (vmode
== V16QImode
)
39976 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
39977 else if (vmode
== V32QImode
)
39978 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
39979 else if (vmode
== V8SFmode
)
39980 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
39982 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
39986 op1
= gen_lowpart (vmode
, d
->op1
);
39987 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
39989 if (target
!= d
->target
)
39990 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39995 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39996 in a single instruction. */
39999 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
40001 unsigned i
, nelt
= d
->nelt
;
40002 unsigned char perm2
[MAX_VECT_LEN
];
40004 /* Check plain VEC_SELECT first, because AVX has instructions that could
40005 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
40006 input where SEL+CONCAT may not. */
40007 if (d
->one_operand_p
)
40009 int mask
= nelt
- 1;
40010 bool identity_perm
= true;
40011 bool broadcast_perm
= true;
40013 for (i
= 0; i
< nelt
; i
++)
40015 perm2
[i
] = d
->perm
[i
] & mask
;
40017 identity_perm
= false;
40019 broadcast_perm
= false;
40025 emit_move_insn (d
->target
, d
->op0
);
40028 else if (broadcast_perm
&& TARGET_AVX2
)
40030 /* Use vpbroadcast{b,w,d}. */
40031 rtx (*gen
) (rtx
, rtx
) = NULL
;
40035 gen
= gen_avx2_pbroadcastv32qi_1
;
40038 gen
= gen_avx2_pbroadcastv16hi_1
;
40041 gen
= gen_avx2_pbroadcastv8si_1
;
40044 gen
= gen_avx2_pbroadcastv16qi
;
40047 gen
= gen_avx2_pbroadcastv8hi
;
40050 gen
= gen_avx2_vec_dupv8sf_1
;
40052 /* For other modes prefer other shuffles this function creates. */
40058 emit_insn (gen (d
->target
, d
->op0
));
40063 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
40066 /* There are plenty of patterns in sse.md that are written for
40067 SEL+CONCAT and are not replicated for a single op. Perhaps
40068 that should be changed, to avoid the nastiness here. */
40070 /* Recognize interleave style patterns, which means incrementing
40071 every other permutation operand. */
40072 for (i
= 0; i
< nelt
; i
+= 2)
40074 perm2
[i
] = d
->perm
[i
] & mask
;
40075 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
40077 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40081 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
40084 for (i
= 0; i
< nelt
; i
+= 4)
40086 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
40087 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
40088 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
40089 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
40092 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40098 /* Finally, try the fully general two operand permute. */
40099 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
40103 /* Recognize interleave style patterns with reversed operands. */
40104 if (!d
->one_operand_p
)
40106 for (i
= 0; i
< nelt
; ++i
)
40108 unsigned e
= d
->perm
[i
];
40116 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
40121 /* Try the SSE4.1 blend variable merge instructions. */
40122 if (expand_vec_perm_blend (d
))
40125 /* Try one of the AVX vpermil variable permutations. */
40126 if (expand_vec_perm_vpermil (d
))
40129 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
40130 vpshufb, vpermd, vpermps or vpermq variable permutation. */
40131 if (expand_vec_perm_pshufb (d
))
40137 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
40138 in terms of a pair of pshuflw + pshufhw instructions. */
40141 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
40143 unsigned char perm2
[MAX_VECT_LEN
];
40147 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
40150 /* The two permutations only operate in 64-bit lanes. */
40151 for (i
= 0; i
< 4; ++i
)
40152 if (d
->perm
[i
] >= 4)
40154 for (i
= 4; i
< 8; ++i
)
40155 if (d
->perm
[i
] < 4)
40161 /* Emit the pshuflw. */
40162 memcpy (perm2
, d
->perm
, 4);
40163 for (i
= 4; i
< 8; ++i
)
40165 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
40168 /* Emit the pshufhw. */
40169 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
40170 for (i
= 0; i
< 4; ++i
)
40172 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
40178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40179 the permutation using the SSSE3 palignr instruction. This succeeds
40180 when all of the elements in PERM fit within one vector and we merely
40181 need to shift them down so that a single vector permutation has a
40182 chance to succeed. */
40185 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
40187 unsigned i
, nelt
= d
->nelt
;
40191 struct expand_vec_perm_d dcopy
;
40193 /* Even with AVX, palignr only operates on 128-bit vectors. */
40194 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40197 min
= nelt
, max
= 0;
40198 for (i
= 0; i
< nelt
; ++i
)
40200 unsigned e
= d
->perm
[i
];
40206 if (min
== 0 || max
- min
>= nelt
)
40209 /* Given that we have SSSE3, we know we'll be able to implement the
40210 single operand permutation after the palignr with pshufb. */
40215 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
40216 target
= gen_reg_rtx (TImode
);
40217 emit_insn (gen_ssse3_palignrti (target
, gen_lowpart (TImode
, d
->op1
),
40218 gen_lowpart (TImode
, d
->op0
), shift
));
40220 dcopy
.op0
= dcopy
.op1
= gen_lowpart (d
->vmode
, target
);
40221 dcopy
.one_operand_p
= true;
40224 for (i
= 0; i
< nelt
; ++i
)
40226 unsigned e
= dcopy
.perm
[i
] - min
;
40232 /* Test for the degenerate case where the alignment by itself
40233 produces the desired permutation. */
40236 emit_move_insn (d
->target
, dcopy
.op0
);
40240 ok
= expand_vec_perm_1 (&dcopy
);
40246 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
40248 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40249 a two vector permutation into a single vector permutation by using
40250 an interleave operation to merge the vectors. */
40253 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
40255 struct expand_vec_perm_d dremap
, dfinal
;
40256 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40257 unsigned HOST_WIDE_INT contents
;
40258 unsigned char remap
[2 * MAX_VECT_LEN
];
40260 bool ok
, same_halves
= false;
40262 if (GET_MODE_SIZE (d
->vmode
) == 16)
40264 if (d
->one_operand_p
)
40267 else if (GET_MODE_SIZE (d
->vmode
) == 32)
40271 /* For 32-byte modes allow even d->one_operand_p.
40272 The lack of cross-lane shuffling in some instructions
40273 might prevent a single insn shuffle. */
40275 dfinal
.testing_p
= true;
40276 /* If expand_vec_perm_interleave3 can expand this into
40277 a 3 insn sequence, give up and let it be expanded as
40278 3 insn sequence. While that is one insn longer,
40279 it doesn't need a memory operand and in the common
40280 case that both interleave low and high permutations
40281 with the same operands are adjacent needs 4 insns
40282 for both after CSE. */
40283 if (expand_vec_perm_interleave3 (&dfinal
))
40289 /* Examine from whence the elements come. */
40291 for (i
= 0; i
< nelt
; ++i
)
40292 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
40294 memset (remap
, 0xff, sizeof (remap
));
40297 if (GET_MODE_SIZE (d
->vmode
) == 16)
40299 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
40301 /* Split the two input vectors into 4 halves. */
40302 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
40307 /* If the elements from the low halves use interleave low, and similarly
40308 for interleave high. If the elements are from mis-matched halves, we
40309 can use shufps for V4SF/V4SI or do a DImode shuffle. */
40310 if ((contents
& (h1
| h3
)) == contents
)
40313 for (i
= 0; i
< nelt2
; ++i
)
40316 remap
[i
+ nelt
] = i
* 2 + 1;
40317 dremap
.perm
[i
* 2] = i
;
40318 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40320 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40321 dremap
.vmode
= V4SFmode
;
40323 else if ((contents
& (h2
| h4
)) == contents
)
40326 for (i
= 0; i
< nelt2
; ++i
)
40328 remap
[i
+ nelt2
] = i
* 2;
40329 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
40330 dremap
.perm
[i
* 2] = i
+ nelt2
;
40331 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
40333 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40334 dremap
.vmode
= V4SFmode
;
40336 else if ((contents
& (h1
| h4
)) == contents
)
40339 for (i
= 0; i
< nelt2
; ++i
)
40342 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
40343 dremap
.perm
[i
] = i
;
40344 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
40349 dremap
.vmode
= V2DImode
;
40351 dremap
.perm
[0] = 0;
40352 dremap
.perm
[1] = 3;
40355 else if ((contents
& (h2
| h3
)) == contents
)
40358 for (i
= 0; i
< nelt2
; ++i
)
40360 remap
[i
+ nelt2
] = i
;
40361 remap
[i
+ nelt
] = i
+ nelt2
;
40362 dremap
.perm
[i
] = i
+ nelt2
;
40363 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
40368 dremap
.vmode
= V2DImode
;
40370 dremap
.perm
[0] = 1;
40371 dremap
.perm
[1] = 2;
40379 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
40380 unsigned HOST_WIDE_INT q
[8];
40381 unsigned int nonzero_halves
[4];
40383 /* Split the two input vectors into 8 quarters. */
40384 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
40385 for (i
= 1; i
< 8; ++i
)
40386 q
[i
] = q
[0] << (nelt4
* i
);
40387 for (i
= 0; i
< 4; ++i
)
40388 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
40390 nonzero_halves
[nzcnt
] = i
;
40396 gcc_assert (d
->one_operand_p
);
40397 nonzero_halves
[1] = nonzero_halves
[0];
40398 same_halves
= true;
40400 else if (d
->one_operand_p
)
40402 gcc_assert (nonzero_halves
[0] == 0);
40403 gcc_assert (nonzero_halves
[1] == 1);
40408 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
40410 /* Attempt to increase the likelihood that dfinal
40411 shuffle will be intra-lane. */
40412 char tmph
= nonzero_halves
[0];
40413 nonzero_halves
[0] = nonzero_halves
[1];
40414 nonzero_halves
[1] = tmph
;
40417 /* vperm2f128 or vperm2i128. */
40418 for (i
= 0; i
< nelt2
; ++i
)
40420 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
40421 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
40422 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
40423 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
40426 if (d
->vmode
!= V8SFmode
40427 && d
->vmode
!= V4DFmode
40428 && d
->vmode
!= V8SImode
)
40430 dremap
.vmode
= V8SImode
;
40432 for (i
= 0; i
< 4; ++i
)
40434 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
40435 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
40439 else if (d
->one_operand_p
)
40441 else if (TARGET_AVX2
40442 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
40445 for (i
= 0; i
< nelt4
; ++i
)
40448 remap
[i
+ nelt
] = i
* 2 + 1;
40449 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
40450 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
40451 dremap
.perm
[i
* 2] = i
;
40452 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40453 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
40454 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
40457 else if (TARGET_AVX2
40458 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
40461 for (i
= 0; i
< nelt4
; ++i
)
40463 remap
[i
+ nelt4
] = i
* 2;
40464 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
40465 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
40466 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
40467 dremap
.perm
[i
* 2] = i
+ nelt4
;
40468 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
40469 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
40470 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
40477 /* Use the remapping array set up above to move the elements from their
40478 swizzled locations into their final destinations. */
40480 for (i
= 0; i
< nelt
; ++i
)
40482 unsigned e
= remap
[d
->perm
[i
]];
40483 gcc_assert (e
< nelt
);
40484 /* If same_halves is true, both halves of the remapped vector are the
40485 same. Avoid cross-lane accesses if possible. */
40486 if (same_halves
&& i
>= nelt2
)
40488 gcc_assert (e
< nelt2
);
40489 dfinal
.perm
[i
] = e
+ nelt2
;
40492 dfinal
.perm
[i
] = e
;
40494 dremap
.target
= gen_reg_rtx (dremap
.vmode
);
40495 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40496 dfinal
.op1
= dfinal
.op0
;
40497 dfinal
.one_operand_p
= true;
40499 /* Test if the final remap can be done with a single insn. For V4SFmode or
40500 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
40502 ok
= expand_vec_perm_1 (&dfinal
);
40503 seq
= get_insns ();
40512 if (dremap
.vmode
!= dfinal
.vmode
)
40514 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
40515 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
40518 ok
= expand_vec_perm_1 (&dremap
);
40525 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40526 a single vector cross-lane permutation into vpermq followed
40527 by any of the single insn permutations. */
40530 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
40532 struct expand_vec_perm_d dremap
, dfinal
;
40533 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
40534 unsigned contents
[2];
40538 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
40539 && d
->one_operand_p
))
40544 for (i
= 0; i
< nelt2
; ++i
)
40546 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
40547 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
40550 for (i
= 0; i
< 2; ++i
)
40552 unsigned int cnt
= 0;
40553 for (j
= 0; j
< 4; ++j
)
40554 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
40562 dremap
.vmode
= V4DImode
;
40564 dremap
.target
= gen_reg_rtx (V4DImode
);
40565 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
40566 dremap
.op1
= dremap
.op0
;
40567 dremap
.one_operand_p
= true;
40568 for (i
= 0; i
< 2; ++i
)
40570 unsigned int cnt
= 0;
40571 for (j
= 0; j
< 4; ++j
)
40572 if ((contents
[i
] & (1u << j
)) != 0)
40573 dremap
.perm
[2 * i
+ cnt
++] = j
;
40574 for (; cnt
< 2; ++cnt
)
40575 dremap
.perm
[2 * i
+ cnt
] = 0;
40579 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40580 dfinal
.op1
= dfinal
.op0
;
40581 dfinal
.one_operand_p
= true;
40582 for (i
= 0, j
= 0; i
< nelt
; ++i
)
40586 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
40587 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
40589 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
40590 dfinal
.perm
[i
] |= nelt4
;
40592 gcc_unreachable ();
40595 ok
= expand_vec_perm_1 (&dremap
);
40598 ok
= expand_vec_perm_1 (&dfinal
);
40604 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
40605 a vector permutation using two instructions, vperm2f128 resp.
40606 vperm2i128 followed by any single in-lane permutation. */
40609 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
40611 struct expand_vec_perm_d dfirst
, dsecond
;
40612 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
40616 || GET_MODE_SIZE (d
->vmode
) != 32
40617 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
40621 dsecond
.one_operand_p
= false;
40622 dsecond
.testing_p
= true;
40624 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
40625 immediate. For perm < 16 the second permutation uses
40626 d->op0 as first operand, for perm >= 16 it uses d->op1
40627 as first operand. The second operand is the result of
40629 for (perm
= 0; perm
< 32; perm
++)
40631 /* Ignore permutations which do not move anything cross-lane. */
40634 /* The second shuffle for e.g. V4DFmode has
40635 0123 and ABCD operands.
40636 Ignore AB23, as 23 is already in the second lane
40637 of the first operand. */
40638 if ((perm
& 0xc) == (1 << 2)) continue;
40639 /* And 01CD, as 01 is in the first lane of the first
40641 if ((perm
& 3) == 0) continue;
40642 /* And 4567, as then the vperm2[fi]128 doesn't change
40643 anything on the original 4567 second operand. */
40644 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
40648 /* The second shuffle for e.g. V4DFmode has
40649 4567 and ABCD operands.
40650 Ignore AB67, as 67 is already in the second lane
40651 of the first operand. */
40652 if ((perm
& 0xc) == (3 << 2)) continue;
40653 /* And 45CD, as 45 is in the first lane of the first
40655 if ((perm
& 3) == 2) continue;
40656 /* And 0123, as then the vperm2[fi]128 doesn't change
40657 anything on the original 0123 first operand. */
40658 if ((perm
& 0xf) == (1 << 2)) continue;
40661 for (i
= 0; i
< nelt
; i
++)
40663 j
= d
->perm
[i
] / nelt2
;
40664 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
40665 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
40666 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
40667 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
40675 ok
= expand_vec_perm_1 (&dsecond
);
40686 /* Found a usable second shuffle. dfirst will be
40687 vperm2f128 on d->op0 and d->op1. */
40688 dsecond
.testing_p
= false;
40690 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40691 for (i
= 0; i
< nelt
; i
++)
40692 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40693 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40695 ok
= expand_vec_perm_1 (&dfirst
);
40698 /* And dsecond is some single insn shuffle, taking
40699 d->op0 and result of vperm2f128 (if perm < 16) or
40700 d->op1 and result of vperm2f128 (otherwise). */
40701 dsecond
.op1
= dfirst
.target
;
40703 dsecond
.op0
= dfirst
.op1
;
40705 ok
= expand_vec_perm_1 (&dsecond
);
40711 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40712 if (d
->one_operand_p
)
40719 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40720 a two vector permutation using 2 intra-lane interleave insns
40721 and cross-lane shuffle for 32-byte vectors. */
40724 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40727 rtx (*gen
) (rtx
, rtx
, rtx
);
40729 if (d
->one_operand_p
)
40731 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40733 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40739 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40741 for (i
= 0; i
< nelt
; i
+= 2)
40742 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40743 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40753 gen
= gen_vec_interleave_highv32qi
;
40755 gen
= gen_vec_interleave_lowv32qi
;
40759 gen
= gen_vec_interleave_highv16hi
;
40761 gen
= gen_vec_interleave_lowv16hi
;
40765 gen
= gen_vec_interleave_highv8si
;
40767 gen
= gen_vec_interleave_lowv8si
;
40771 gen
= gen_vec_interleave_highv4di
;
40773 gen
= gen_vec_interleave_lowv4di
;
40777 gen
= gen_vec_interleave_highv8sf
;
40779 gen
= gen_vec_interleave_lowv8sf
;
40783 gen
= gen_vec_interleave_highv4df
;
40785 gen
= gen_vec_interleave_lowv4df
;
40788 gcc_unreachable ();
40791 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40795 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40796 a single vector permutation using a single intra-lane vector
40797 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40798 the non-swapped and swapped vectors together. */
40801 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40803 struct expand_vec_perm_d dfirst
, dsecond
;
40804 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40807 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40811 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40812 || !d
->one_operand_p
)
40816 for (i
= 0; i
< nelt
; i
++)
40817 dfirst
.perm
[i
] = 0xff;
40818 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40820 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40821 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40823 dfirst
.perm
[j
] = d
->perm
[i
];
40827 for (i
= 0; i
< nelt
; i
++)
40828 if (dfirst
.perm
[i
] == 0xff)
40829 dfirst
.perm
[i
] = i
;
40832 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40835 ok
= expand_vec_perm_1 (&dfirst
);
40836 seq
= get_insns ();
40848 dsecond
.op0
= dfirst
.target
;
40849 dsecond
.op1
= dfirst
.target
;
40850 dsecond
.one_operand_p
= true;
40851 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40852 for (i
= 0; i
< nelt
; i
++)
40853 dsecond
.perm
[i
] = i
^ nelt2
;
40855 ok
= expand_vec_perm_1 (&dsecond
);
40858 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
40859 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
40863 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
40864 permutation using two vperm2f128, followed by a vshufpd insn blending
40865 the two vectors together. */
40868 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
40870 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
40873 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
40883 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
40884 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
40885 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
40886 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
40887 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
40888 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
40889 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
40890 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
40891 dthird
.perm
[0] = (d
->perm
[0] % 2);
40892 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
40893 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
40894 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
40896 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40897 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40898 dthird
.op0
= dfirst
.target
;
40899 dthird
.op1
= dsecond
.target
;
40900 dthird
.one_operand_p
= false;
40902 canonicalize_perm (&dfirst
);
40903 canonicalize_perm (&dsecond
);
40905 ok
= expand_vec_perm_1 (&dfirst
)
40906 && expand_vec_perm_1 (&dsecond
)
40907 && expand_vec_perm_1 (&dthird
);
40914 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
40915 permutation with two pshufb insns and an ior. We should have already
40916 failed all two instruction sequences. */
40919 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
40921 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
40922 unsigned int i
, nelt
, eltsz
;
40924 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40926 gcc_assert (!d
->one_operand_p
);
40929 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40931 /* Generate two permutation masks. If the required element is within
40932 the given vector it is shuffled into the proper lane. If the required
40933 element is in the other vector, force a zero into the lane by setting
40934 bit 7 in the permutation mask. */
40935 m128
= GEN_INT (-128);
40936 for (i
= 0; i
< nelt
; ++i
)
40938 unsigned j
, e
= d
->perm
[i
];
40939 unsigned which
= (e
>= nelt
);
40943 for (j
= 0; j
< eltsz
; ++j
)
40945 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
40946 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
40950 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
40951 vperm
= force_reg (V16QImode
, vperm
);
40953 l
= gen_reg_rtx (V16QImode
);
40954 op
= gen_lowpart (V16QImode
, d
->op0
);
40955 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
40957 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
40958 vperm
= force_reg (V16QImode
, vperm
);
40960 h
= gen_reg_rtx (V16QImode
);
40961 op
= gen_lowpart (V16QImode
, d
->op1
);
40962 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
40965 if (d
->vmode
!= V16QImode
)
40966 op
= gen_reg_rtx (V16QImode
);
40967 emit_insn (gen_iorv16qi3 (op
, l
, h
));
40968 if (op
!= d
->target
)
40969 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
40974 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40975 with two vpshufb insns, vpermq and vpor. We should have already failed
40976 all two or three instruction sequences. */
40979 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
40981 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
40982 unsigned int i
, nelt
, eltsz
;
40985 || !d
->one_operand_p
40986 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40993 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40995 /* Generate two permutation masks. If the required element is within
40996 the same lane, it is shuffled in. If the required element from the
40997 other lane, force a zero by setting bit 7 in the permutation mask.
40998 In the other mask the mask has non-negative elements if element
40999 is requested from the other lane, but also moved to the other lane,
41000 so that the result of vpshufb can have the two V2TImode halves
41002 m128
= GEN_INT (-128);
41003 for (i
= 0; i
< nelt
; ++i
)
41005 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41006 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41008 for (j
= 0; j
< eltsz
; ++j
)
41010 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
41011 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
41015 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41016 vperm
= force_reg (V32QImode
, vperm
);
41018 h
= gen_reg_rtx (V32QImode
);
41019 op
= gen_lowpart (V32QImode
, d
->op0
);
41020 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41022 /* Swap the 128-byte lanes of h into hp. */
41023 hp
= gen_reg_rtx (V4DImode
);
41024 op
= gen_lowpart (V4DImode
, h
);
41025 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
41028 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41029 vperm
= force_reg (V32QImode
, vperm
);
41031 l
= gen_reg_rtx (V32QImode
);
41032 op
= gen_lowpart (V32QImode
, d
->op0
);
41033 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41036 if (d
->vmode
!= V32QImode
)
41037 op
= gen_reg_rtx (V32QImode
);
41038 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
41039 if (op
!= d
->target
)
41040 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41045 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
41046 and extract-odd permutations of two V32QImode and V16QImode operand
41047 with two vpshufb insns, vpor and vpermq. We should have already
41048 failed all two or three instruction sequences. */
41051 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
41053 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
41054 unsigned int i
, nelt
, eltsz
;
41057 || d
->one_operand_p
41058 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41061 for (i
= 0; i
< d
->nelt
; ++i
)
41062 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
41069 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41071 /* Generate two permutation masks. In the first permutation mask
41072 the first quarter will contain indexes for the first half
41073 of the op0, the second quarter will contain bit 7 set, third quarter
41074 will contain indexes for the second half of the op0 and the
41075 last quarter bit 7 set. In the second permutation mask
41076 the first quarter will contain bit 7 set, the second quarter
41077 indexes for the first half of the op1, the third quarter bit 7 set
41078 and last quarter indexes for the second half of the op1.
41079 I.e. the first mask e.g. for V32QImode extract even will be:
41080 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
41081 (all values masked with 0xf except for -128) and second mask
41082 for extract even will be
41083 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
41084 m128
= GEN_INT (-128);
41085 for (i
= 0; i
< nelt
; ++i
)
41087 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41088 unsigned which
= d
->perm
[i
] >= nelt
;
41089 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
41091 for (j
= 0; j
< eltsz
; ++j
)
41093 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
41094 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
41098 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41099 vperm
= force_reg (V32QImode
, vperm
);
41101 l
= gen_reg_rtx (V32QImode
);
41102 op
= gen_lowpart (V32QImode
, d
->op0
);
41103 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41105 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41106 vperm
= force_reg (V32QImode
, vperm
);
41108 h
= gen_reg_rtx (V32QImode
);
41109 op
= gen_lowpart (V32QImode
, d
->op1
);
41110 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41112 ior
= gen_reg_rtx (V32QImode
);
41113 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
41115 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
41116 op
= gen_reg_rtx (V4DImode
);
41117 ior
= gen_lowpart (V4DImode
, ior
);
41118 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
41119 const1_rtx
, GEN_INT (3)));
41120 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41125 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
41126 and extract-odd permutations. */
41129 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
41131 rtx t1
, t2
, t3
, t4
, t5
;
41136 t1
= gen_reg_rtx (V4DFmode
);
41137 t2
= gen_reg_rtx (V4DFmode
);
41139 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41140 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41141 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41143 /* Now an unpck[lh]pd will produce the result required. */
41145 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
41147 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
41153 int mask
= odd
? 0xdd : 0x88;
41155 t1
= gen_reg_rtx (V8SFmode
);
41156 t2
= gen_reg_rtx (V8SFmode
);
41157 t3
= gen_reg_rtx (V8SFmode
);
41159 /* Shuffle within the 128-bit lanes to produce:
41160 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
41161 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
41164 /* Shuffle the lanes around to produce:
41165 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
41166 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
41169 /* Shuffle within the 128-bit lanes to produce:
41170 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
41171 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
41173 /* Shuffle within the 128-bit lanes to produce:
41174 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
41175 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
41177 /* Shuffle the lanes around to produce:
41178 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
41179 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
41188 /* These are always directly implementable by expand_vec_perm_1. */
41189 gcc_unreachable ();
41193 return expand_vec_perm_pshufb2 (d
);
41196 /* We need 2*log2(N)-1 operations to achieve odd/even
41197 with interleave. */
41198 t1
= gen_reg_rtx (V8HImode
);
41199 t2
= gen_reg_rtx (V8HImode
);
41200 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
41201 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
41202 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
41203 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
41205 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
41207 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
41214 return expand_vec_perm_pshufb2 (d
);
41217 t1
= gen_reg_rtx (V16QImode
);
41218 t2
= gen_reg_rtx (V16QImode
);
41219 t3
= gen_reg_rtx (V16QImode
);
41220 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
41221 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
41222 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
41223 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
41224 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
41225 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
41227 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
41229 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
41236 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
41241 struct expand_vec_perm_d d_copy
= *d
;
41242 d_copy
.vmode
= V4DFmode
;
41243 d_copy
.target
= gen_reg_rtx (V4DFmode
);
41244 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
41245 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
41246 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41249 emit_move_insn (d
->target
,
41250 gen_lowpart (V4DImode
, d_copy
.target
));
41256 t1
= gen_reg_rtx (V4DImode
);
41257 t2
= gen_reg_rtx (V4DImode
);
41259 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41260 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41261 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41263 /* Now an vpunpck[lh]qdq will produce the result required. */
41265 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
41267 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
41274 struct expand_vec_perm_d d_copy
= *d
;
41275 d_copy
.vmode
= V8SFmode
;
41276 d_copy
.target
= gen_reg_rtx (V8SFmode
);
41277 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
41278 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
41279 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41282 emit_move_insn (d
->target
,
41283 gen_lowpart (V8SImode
, d_copy
.target
));
41289 t1
= gen_reg_rtx (V8SImode
);
41290 t2
= gen_reg_rtx (V8SImode
);
41291 t3
= gen_reg_rtx (V4DImode
);
41292 t4
= gen_reg_rtx (V4DImode
);
41293 t5
= gen_reg_rtx (V4DImode
);
41295 /* Shuffle the lanes around into
41296 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
41297 emit_insn (gen_avx2_permv2ti (t3
, gen_lowpart (V4DImode
, d
->op0
),
41298 gen_lowpart (V4DImode
, d
->op1
),
41300 emit_insn (gen_avx2_permv2ti (t4
, gen_lowpart (V4DImode
, d
->op0
),
41301 gen_lowpart (V4DImode
, d
->op1
),
41304 /* Swap the 2nd and 3rd position in each lane into
41305 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
41306 emit_insn (gen_avx2_pshufdv3 (t1
, gen_lowpart (V8SImode
, t3
),
41307 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41308 emit_insn (gen_avx2_pshufdv3 (t2
, gen_lowpart (V8SImode
, t4
),
41309 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41311 /* Now an vpunpck[lh]qdq will produce
41312 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
41314 t3
= gen_avx2_interleave_highv4di (t5
, gen_lowpart (V4DImode
, t1
),
41315 gen_lowpart (V4DImode
, t2
));
41317 t3
= gen_avx2_interleave_lowv4di (t5
, gen_lowpart (V4DImode
, t1
),
41318 gen_lowpart (V4DImode
, t2
));
41320 emit_move_insn (d
->target
, gen_lowpart (V8SImode
, t5
));
41324 gcc_unreachable ();
41330 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41331 extract-even and extract-odd permutations. */
41334 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
41336 unsigned i
, odd
, nelt
= d
->nelt
;
41339 if (odd
!= 0 && odd
!= 1)
41342 for (i
= 1; i
< nelt
; ++i
)
41343 if (d
->perm
[i
] != 2 * i
+ odd
)
41346 return expand_vec_perm_even_odd_1 (d
, odd
);
41349 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
41350 permutations. We assume that expand_vec_perm_1 has already failed. */
41353 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
41355 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
41356 enum machine_mode vmode
= d
->vmode
;
41357 unsigned char perm2
[4];
41358 rtx op0
= d
->op0
, dest
;
41365 /* These are special-cased in sse.md so that we can optionally
41366 use the vbroadcast instruction. They expand to two insns
41367 if the input happens to be in a register. */
41368 gcc_unreachable ();
41374 /* These are always implementable using standard shuffle patterns. */
41375 gcc_unreachable ();
41379 /* These can be implemented via interleave. We save one insn by
41380 stopping once we have promoted to V4SImode and then use pshufd. */
41384 rtx (*gen
) (rtx
, rtx
, rtx
)
41385 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
41386 : gen_vec_interleave_lowv8hi
;
41390 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
41391 : gen_vec_interleave_highv8hi
;
41396 dest
= gen_reg_rtx (vmode
);
41397 emit_insn (gen (dest
, op0
, op0
));
41398 vmode
= get_mode_wider_vector (vmode
);
41399 op0
= gen_lowpart (vmode
, dest
);
41401 while (vmode
!= V4SImode
);
41403 memset (perm2
, elt
, 4);
41404 dest
= gen_reg_rtx (V4SImode
);
41405 ok
= expand_vselect (dest
, op0
, perm2
, 4, d
->testing_p
);
41408 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, dest
));
41415 /* For AVX2 broadcasts of the first element vpbroadcast* or
41416 vpermq should be used by expand_vec_perm_1. */
41417 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
41421 gcc_unreachable ();
41425 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41426 broadcast permutations. */
41429 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
41431 unsigned i
, elt
, nelt
= d
->nelt
;
41433 if (!d
->one_operand_p
)
41437 for (i
= 1; i
< nelt
; ++i
)
41438 if (d
->perm
[i
] != elt
)
41441 return expand_vec_perm_broadcast_1 (d
);
41444 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
41445 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
41446 all the shorter instruction sequences. */
41449 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
41451 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
41452 unsigned int i
, nelt
, eltsz
;
41456 || d
->one_operand_p
41457 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41464 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41466 /* Generate 4 permutation masks. If the required element is within
41467 the same lane, it is shuffled in. If the required element from the
41468 other lane, force a zero by setting bit 7 in the permutation mask.
41469 In the other mask the mask has non-negative elements if element
41470 is requested from the other lane, but also moved to the other lane,
41471 so that the result of vpshufb can have the two V2TImode halves
41473 m128
= GEN_INT (-128);
41474 for (i
= 0; i
< 32; ++i
)
41476 rperm
[0][i
] = m128
;
41477 rperm
[1][i
] = m128
;
41478 rperm
[2][i
] = m128
;
41479 rperm
[3][i
] = m128
;
41485 for (i
= 0; i
< nelt
; ++i
)
41487 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41488 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41489 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
41491 for (j
= 0; j
< eltsz
; ++j
)
41492 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
41493 used
[which
] = true;
41496 for (i
= 0; i
< 2; ++i
)
41498 if (!used
[2 * i
+ 1])
41503 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
41504 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
41505 vperm
= force_reg (V32QImode
, vperm
);
41506 h
[i
] = gen_reg_rtx (V32QImode
);
41507 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41508 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
41511 /* Swap the 128-byte lanes of h[X]. */
41512 for (i
= 0; i
< 2; ++i
)
41514 if (h
[i
] == NULL_RTX
)
41516 op
= gen_reg_rtx (V4DImode
);
41517 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
41518 const2_rtx
, GEN_INT (3), const0_rtx
,
41520 h
[i
] = gen_lowpart (V32QImode
, op
);
41523 for (i
= 0; i
< 2; ++i
)
41530 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
41531 vperm
= force_reg (V32QImode
, vperm
);
41532 l
[i
] = gen_reg_rtx (V32QImode
);
41533 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41534 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
41537 for (i
= 0; i
< 2; ++i
)
41541 op
= gen_reg_rtx (V32QImode
);
41542 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
41549 gcc_assert (l
[0] && l
[1]);
41551 if (d
->vmode
!= V32QImode
)
41552 op
= gen_reg_rtx (V32QImode
);
41553 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
41554 if (op
!= d
->target
)
41555 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41559 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
41560 With all of the interface bits taken care of, perform the expansion
41561 in D and return true on success. */
41564 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
41566 /* Try a single instruction expansion. */
41567 if (expand_vec_perm_1 (d
))
41570 /* Try sequences of two instructions. */
41572 if (expand_vec_perm_pshuflw_pshufhw (d
))
41575 if (expand_vec_perm_palignr (d
))
41578 if (expand_vec_perm_interleave2 (d
))
41581 if (expand_vec_perm_broadcast (d
))
41584 if (expand_vec_perm_vpermq_perm_1 (d
))
41587 if (expand_vec_perm_vperm2f128 (d
))
41590 /* Try sequences of three instructions. */
41592 if (expand_vec_perm_2vperm2f128_vshuf (d
))
41595 if (expand_vec_perm_pshufb2 (d
))
41598 if (expand_vec_perm_interleave3 (d
))
41601 if (expand_vec_perm_vperm2f128_vblend (d
))
41604 /* Try sequences of four instructions. */
41606 if (expand_vec_perm_vpshufb2_vpermq (d
))
41609 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
41612 /* ??? Look for narrow permutations whose element orderings would
41613 allow the promotion to a wider mode. */
41615 /* ??? Look for sequences of interleave or a wider permute that place
41616 the data into the correct lanes for a half-vector shuffle like
41617 pshuf[lh]w or vpermilps. */
41619 /* ??? Look for sequences of interleave that produce the desired results.
41620 The combinatorics of punpck[lh] get pretty ugly... */
41622 if (expand_vec_perm_even_odd (d
))
41625 /* Even longer sequences. */
41626 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
41632 /* If a permutation only uses one operand, make it clear. Returns true
41633 if the permutation references both operands. */
41636 canonicalize_perm (struct expand_vec_perm_d
*d
)
41638 int i
, which
, nelt
= d
->nelt
;
41640 for (i
= which
= 0; i
< nelt
; ++i
)
41641 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
41643 d
->one_operand_p
= true;
41650 if (!rtx_equal_p (d
->op0
, d
->op1
))
41652 d
->one_operand_p
= false;
41655 /* The elements of PERM do not suggest that only the first operand
41656 is used, but both operands are identical. Allow easier matching
41657 of the permutation by folding the permutation into the single
41662 for (i
= 0; i
< nelt
; ++i
)
41663 d
->perm
[i
] &= nelt
- 1;
41672 return (which
== 3);
41676 ix86_expand_vec_perm_const (rtx operands
[4])
41678 struct expand_vec_perm_d d
;
41679 unsigned char perm
[MAX_VECT_LEN
];
41684 d
.target
= operands
[0];
41685 d
.op0
= operands
[1];
41686 d
.op1
= operands
[2];
41689 d
.vmode
= GET_MODE (d
.target
);
41690 gcc_assert (VECTOR_MODE_P (d
.vmode
));
41691 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41692 d
.testing_p
= false;
41694 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
41695 gcc_assert (XVECLEN (sel
, 0) == nelt
);
41696 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
41698 for (i
= 0; i
< nelt
; ++i
)
41700 rtx e
= XVECEXP (sel
, 0, i
);
41701 int ei
= INTVAL (e
) & (2 * nelt
- 1);
41706 two_args
= canonicalize_perm (&d
);
41708 if (ix86_expand_vec_perm_const_1 (&d
))
41711 /* If the selector says both arguments are needed, but the operands are the
41712 same, the above tried to expand with one_operand_p and flattened selector.
41713 If that didn't work, retry without one_operand_p; we succeeded with that
41715 if (two_args
&& d
.one_operand_p
)
41717 d
.one_operand_p
= false;
41718 memcpy (d
.perm
, perm
, sizeof (perm
));
41719 return ix86_expand_vec_perm_const_1 (&d
);
41725 /* Implement targetm.vectorize.vec_perm_const_ok. */
41728 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41729 const unsigned char *sel
)
41731 struct expand_vec_perm_d d
;
41732 unsigned int i
, nelt
, which
;
41736 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41737 d
.testing_p
= true;
41739 /* Given sufficient ISA support we can just return true here
41740 for selected vector modes. */
41741 if (GET_MODE_SIZE (d
.vmode
) == 16)
41743 /* All implementable with a single vpperm insn. */
41746 /* All implementable with 2 pshufb + 1 ior. */
41749 /* All implementable with shufpd or unpck[lh]pd. */
41754 /* Extract the values from the vector CST into the permutation
41756 memcpy (d
.perm
, sel
, nelt
);
41757 for (i
= which
= 0; i
< nelt
; ++i
)
41759 unsigned char e
= d
.perm
[i
];
41760 gcc_assert (e
< 2 * nelt
);
41761 which
|= (e
< nelt
? 1 : 2);
41764 /* For all elements from second vector, fold the elements to first. */
41766 for (i
= 0; i
< nelt
; ++i
)
41769 /* Check whether the mask can be applied to the vector type. */
41770 d
.one_operand_p
= (which
!= 3);
41772 /* Implementable with shufps or pshufd. */
41773 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41776 /* Otherwise we have to go through the motions and see if we can
41777 figure out how to generate the requested permutation. */
41778 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41779 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41780 if (!d
.one_operand_p
)
41781 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41784 ret
= ix86_expand_vec_perm_const_1 (&d
);
41791 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41793 struct expand_vec_perm_d d
;
41799 d
.vmode
= GET_MODE (targ
);
41800 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41801 d
.one_operand_p
= false;
41802 d
.testing_p
= false;
41804 for (i
= 0; i
< nelt
; ++i
)
41805 d
.perm
[i
] = i
* 2 + odd
;
41807 /* We'll either be able to implement the permutation directly... */
41808 if (expand_vec_perm_1 (&d
))
41811 /* ... or we use the special-case patterns. */
41812 expand_vec_perm_even_odd_1 (&d
, odd
);
41816 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41818 struct expand_vec_perm_d d
;
41819 unsigned i
, nelt
, base
;
41825 d
.vmode
= GET_MODE (targ
);
41826 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41827 d
.one_operand_p
= false;
41828 d
.testing_p
= false;
41830 base
= high_p
? nelt
/ 2 : 0;
41831 for (i
= 0; i
< nelt
/ 2; ++i
)
41833 d
.perm
[i
* 2] = i
+ base
;
41834 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
41837 /* Note that for AVX this isn't one instruction. */
41838 ok
= ix86_expand_vec_perm_const_1 (&d
);
41843 /* Expand a vector operation CODE for a V*QImode in terms of the
41844 same operation on V*HImode. */
41847 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
41849 enum machine_mode qimode
= GET_MODE (dest
);
41850 enum machine_mode himode
;
41851 rtx (*gen_il
) (rtx
, rtx
, rtx
);
41852 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
41853 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
41854 struct expand_vec_perm_d d
;
41855 bool ok
, full_interleave
;
41856 bool uns_p
= false;
41863 gen_il
= gen_vec_interleave_lowv16qi
;
41864 gen_ih
= gen_vec_interleave_highv16qi
;
41867 himode
= V16HImode
;
41868 gen_il
= gen_avx2_interleave_lowv32qi
;
41869 gen_ih
= gen_avx2_interleave_highv32qi
;
41872 gcc_unreachable ();
41875 op2_l
= op2_h
= op2
;
41879 /* Unpack data such that we've got a source byte in each low byte of
41880 each word. We don't care what goes into the high byte of each word.
41881 Rather than trying to get zero in there, most convenient is to let
41882 it be a copy of the low byte. */
41883 op2_l
= gen_reg_rtx (qimode
);
41884 op2_h
= gen_reg_rtx (qimode
);
41885 emit_insn (gen_il (op2_l
, op2
, op2
));
41886 emit_insn (gen_ih (op2_h
, op2
, op2
));
41889 op1_l
= gen_reg_rtx (qimode
);
41890 op1_h
= gen_reg_rtx (qimode
);
41891 emit_insn (gen_il (op1_l
, op1
, op1
));
41892 emit_insn (gen_ih (op1_h
, op1
, op1
));
41893 full_interleave
= qimode
== V16QImode
;
41901 op1_l
= gen_reg_rtx (himode
);
41902 op1_h
= gen_reg_rtx (himode
);
41903 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
41904 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
41905 full_interleave
= true;
41908 gcc_unreachable ();
41911 /* Perform the operation. */
41912 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
41914 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
41916 gcc_assert (res_l
&& res_h
);
41918 /* Merge the data back into the right place. */
41920 d
.op0
= gen_lowpart (qimode
, res_l
);
41921 d
.op1
= gen_lowpart (qimode
, res_h
);
41923 d
.nelt
= GET_MODE_NUNITS (qimode
);
41924 d
.one_operand_p
= false;
41925 d
.testing_p
= false;
41927 if (full_interleave
)
41929 /* For SSE2, we used an full interleave, so the desired
41930 results are in the even elements. */
41931 for (i
= 0; i
< 32; ++i
)
41936 /* For AVX, the interleave used above was not cross-lane. So the
41937 extraction is evens but with the second and third quarter swapped.
41938 Happily, that is even one insn shorter than even extraction. */
41939 for (i
= 0; i
< 32; ++i
)
41940 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
41943 ok
= ix86_expand_vec_perm_const_1 (&d
);
41946 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41947 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
41950 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
41951 if op is CONST_VECTOR with all odd elements equal to their
41952 preceding element. */
41955 const_vector_equal_evenodd_p (rtx op
)
41957 enum machine_mode mode
= GET_MODE (op
);
41958 int i
, nunits
= GET_MODE_NUNITS (mode
);
41959 if (GET_CODE (op
) != CONST_VECTOR
41960 || nunits
!= CONST_VECTOR_NUNITS (op
))
41962 for (i
= 0; i
< nunits
; i
+= 2)
41963 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
41969 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
41970 bool uns_p
, bool odd_p
)
41972 enum machine_mode mode
= GET_MODE (op1
);
41973 enum machine_mode wmode
= GET_MODE (dest
);
41975 rtx orig_op1
= op1
, orig_op2
= op2
;
41977 if (!nonimmediate_operand (op1
, mode
))
41978 op1
= force_reg (mode
, op1
);
41979 if (!nonimmediate_operand (op2
, mode
))
41980 op2
= force_reg (mode
, op2
);
41982 /* We only play even/odd games with vectors of SImode. */
41983 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
41985 /* If we're looking for the odd results, shift those members down to
41986 the even slots. For some cpus this is faster than a PSHUFD. */
41989 /* For XOP use vpmacsdqh, but only for smult, as it is only
41991 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
41993 x
= force_reg (wmode
, CONST0_RTX (wmode
));
41994 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
41998 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
41999 if (!const_vector_equal_evenodd_p (orig_op1
))
42000 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
42001 x
, NULL
, 1, OPTAB_DIRECT
);
42002 if (!const_vector_equal_evenodd_p (orig_op2
))
42003 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
42004 x
, NULL
, 1, OPTAB_DIRECT
);
42005 op1
= gen_lowpart (mode
, op1
);
42006 op2
= gen_lowpart (mode
, op2
);
42009 if (mode
== V8SImode
)
42012 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
42014 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
42017 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
42018 else if (TARGET_SSE4_1
)
42019 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
42022 rtx s1
, s2
, t0
, t1
, t2
;
42024 /* The easiest way to implement this without PMULDQ is to go through
42025 the motions as if we are performing a full 64-bit multiply. With
42026 the exception that we need to do less shuffling of the elements. */
42028 /* Compute the sign-extension, aka highparts, of the two operands. */
42029 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42030 op1
, pc_rtx
, pc_rtx
);
42031 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42032 op2
, pc_rtx
, pc_rtx
);
42034 /* Multiply LO(A) * HI(B), and vice-versa. */
42035 t1
= gen_reg_rtx (wmode
);
42036 t2
= gen_reg_rtx (wmode
);
42037 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
42038 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
42040 /* Multiply LO(A) * LO(B). */
42041 t0
= gen_reg_rtx (wmode
);
42042 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
42044 /* Combine and shift the highparts into place. */
42045 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
42046 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
42049 /* Combine high and low parts. */
42050 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
42057 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
42058 bool uns_p
, bool high_p
)
42060 enum machine_mode wmode
= GET_MODE (dest
);
42061 enum machine_mode mode
= GET_MODE (op1
);
42062 rtx t1
, t2
, t3
, t4
, mask
;
42067 t1
= gen_reg_rtx (mode
);
42068 t2
= gen_reg_rtx (mode
);
42069 if (TARGET_XOP
&& !uns_p
)
42071 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
42072 shuffle the elements once so that all elements are in the right
42073 place for immediate use: { A C B D }. */
42074 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
42075 const1_rtx
, GEN_INT (3)));
42076 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
42077 const1_rtx
, GEN_INT (3)));
42081 /* Put the elements into place for the multiply. */
42082 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
42083 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
42086 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
42090 /* Shuffle the elements between the lanes. After this we
42091 have { A B E F | C D G H } for each operand. */
42092 t1
= gen_reg_rtx (V4DImode
);
42093 t2
= gen_reg_rtx (V4DImode
);
42094 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
42095 const0_rtx
, const2_rtx
,
42096 const1_rtx
, GEN_INT (3)));
42097 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
42098 const0_rtx
, const2_rtx
,
42099 const1_rtx
, GEN_INT (3)));
42101 /* Shuffle the elements within the lanes. After this we
42102 have { A A B B | C C D D } or { E E F F | G G H H }. */
42103 t3
= gen_reg_rtx (V8SImode
);
42104 t4
= gen_reg_rtx (V8SImode
);
42105 mask
= GEN_INT (high_p
42106 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
42107 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
42108 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
42109 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
42111 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
42116 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
42117 uns_p
, OPTAB_DIRECT
);
42118 t2
= expand_binop (mode
,
42119 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
42120 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
42121 gcc_assert (t1
&& t2
);
42123 t3
= gen_reg_rtx (mode
);
42124 ix86_expand_vec_interleave (t3
, t1
, t2
, high_p
);
42125 emit_move_insn (dest
, gen_lowpart (wmode
, t3
));
42130 t1
= gen_reg_rtx (wmode
);
42131 t2
= gen_reg_rtx (wmode
);
42132 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
42133 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
42135 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
42139 gcc_unreachable ();
42144 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
42146 rtx res_1
, res_2
, res_3
, res_4
;
42148 res_1
= gen_reg_rtx (V4SImode
);
42149 res_2
= gen_reg_rtx (V4SImode
);
42150 res_3
= gen_reg_rtx (V2DImode
);
42151 res_4
= gen_reg_rtx (V2DImode
);
42152 ix86_expand_mul_widen_evenodd (res_3
, op1
, op2
, true, false);
42153 ix86_expand_mul_widen_evenodd (res_4
, op1
, op2
, true, true);
42155 /* Move the results in element 2 down to element 1; we don't care
42156 what goes in elements 2 and 3. Then we can merge the parts
42157 back together with an interleave.
42159 Note that two other sequences were tried:
42160 (1) Use interleaves at the start instead of psrldq, which allows
42161 us to use a single shufps to merge things back at the end.
42162 (2) Use shufps here to combine the two vectors, then pshufd to
42163 put the elements in the correct order.
42164 In both cases the cost of the reformatting stall was too high
42165 and the overall sequence slower. */
42167 emit_insn (gen_sse2_pshufd_1 (res_1
, gen_lowpart (V4SImode
, res_3
),
42168 const0_rtx
, const2_rtx
,
42169 const0_rtx
, const0_rtx
));
42170 emit_insn (gen_sse2_pshufd_1 (res_2
, gen_lowpart (V4SImode
, res_4
),
42171 const0_rtx
, const2_rtx
,
42172 const0_rtx
, const0_rtx
));
42173 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
42175 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
42179 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
42181 enum machine_mode mode
= GET_MODE (op0
);
42182 rtx t1
, t2
, t3
, t4
, t5
, t6
;
42184 if (TARGET_XOP
&& mode
== V2DImode
)
42186 /* op1: A,B,C,D, op2: E,F,G,H */
42187 op1
= gen_lowpart (V4SImode
, op1
);
42188 op2
= gen_lowpart (V4SImode
, op2
);
42190 t1
= gen_reg_rtx (V4SImode
);
42191 t2
= gen_reg_rtx (V4SImode
);
42192 t3
= gen_reg_rtx (V2DImode
);
42193 t4
= gen_reg_rtx (V2DImode
);
42196 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
42202 /* t2: (B*E),(A*F),(D*G),(C*H) */
42203 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
42205 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
42206 emit_insn (gen_xop_phadddq (t3
, t2
));
42208 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
42209 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
42211 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
42212 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
42216 enum machine_mode nmode
;
42217 rtx (*umul
) (rtx
, rtx
, rtx
);
42219 if (mode
== V2DImode
)
42221 umul
= gen_vec_widen_umult_even_v4si
;
42224 else if (mode
== V4DImode
)
42226 umul
= gen_vec_widen_umult_even_v8si
;
42230 gcc_unreachable ();
42233 /* Multiply low parts. */
42234 t1
= gen_reg_rtx (mode
);
42235 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
42237 /* Shift input vectors right 32 bits so we can multiply high parts. */
42239 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
42240 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
42242 /* Multiply high parts by low parts. */
42243 t4
= gen_reg_rtx (mode
);
42244 t5
= gen_reg_rtx (mode
);
42245 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
42246 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
42248 /* Combine and shift the highparts back. */
42249 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
42250 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
42252 /* Combine high and low parts. */
42253 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
42256 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
42257 gen_rtx_MULT (mode
, op1
, op2
));
42260 /* Return 1 if control tansfer instruction INSN
42261 should be encoded with bnd prefix.
42262 If insn is NULL then return 1 when control
42263 transfer instructions should be prefixed with
42264 bnd by default for current function. */
42267 ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED
)
42272 /* Calculate integer abs() using only SSE2 instructions. */
42275 ix86_expand_sse2_abs (rtx target
, rtx input
)
42277 enum machine_mode mode
= GET_MODE (target
);
42282 /* For 32-bit signed integer X, the best way to calculate the absolute
42283 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
42285 tmp0
= expand_simple_binop (mode
, ASHIFTRT
, input
,
42286 GEN_INT (GET_MODE_BITSIZE
42287 (GET_MODE_INNER (mode
)) - 1),
42288 NULL
, 0, OPTAB_DIRECT
);
42289 tmp1
= expand_simple_binop (mode
, XOR
, tmp0
, input
,
42290 NULL
, 0, OPTAB_DIRECT
);
42291 x
= expand_simple_binop (mode
, MINUS
, tmp1
, tmp0
,
42292 target
, 0, OPTAB_DIRECT
);
42295 /* For 16-bit signed integer X, the best way to calculate the absolute
42296 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
42298 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42300 x
= expand_simple_binop (mode
, SMAX
, tmp0
, input
,
42301 target
, 0, OPTAB_DIRECT
);
42304 /* For 8-bit signed integer X, the best way to calculate the absolute
42305 value of X is min ((unsigned char) X, (unsigned char) (-X)),
42306 as SSE2 provides the PMINUB insn. */
42308 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42310 x
= expand_simple_binop (V16QImode
, UMIN
, tmp0
, input
,
42311 target
, 0, OPTAB_DIRECT
);
42315 gcc_unreachable ();
42319 emit_move_insn (target
, x
);
42322 /* Expand an insert into a vector register through pinsr insn.
42323 Return true if successful. */
42326 ix86_expand_pinsr (rtx
*operands
)
42328 rtx dst
= operands
[0];
42329 rtx src
= operands
[3];
42331 unsigned int size
= INTVAL (operands
[1]);
42332 unsigned int pos
= INTVAL (operands
[2]);
42334 if (GET_CODE (dst
) == SUBREG
)
42336 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
42337 dst
= SUBREG_REG (dst
);
42340 if (GET_CODE (src
) == SUBREG
)
42341 src
= SUBREG_REG (src
);
42343 switch (GET_MODE (dst
))
42350 enum machine_mode srcmode
, dstmode
;
42351 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
42353 srcmode
= mode_for_size (size
, MODE_INT
, 0);
42358 if (!TARGET_SSE4_1
)
42360 dstmode
= V16QImode
;
42361 pinsr
= gen_sse4_1_pinsrb
;
42367 dstmode
= V8HImode
;
42368 pinsr
= gen_sse2_pinsrw
;
42372 if (!TARGET_SSE4_1
)
42374 dstmode
= V4SImode
;
42375 pinsr
= gen_sse4_1_pinsrd
;
42379 gcc_assert (TARGET_64BIT
);
42380 if (!TARGET_SSE4_1
)
42382 dstmode
= V2DImode
;
42383 pinsr
= gen_sse4_1_pinsrq
;
42391 if (GET_MODE (dst
) != dstmode
)
42392 d
= gen_reg_rtx (dstmode
);
42393 src
= gen_lowpart (srcmode
, src
);
42397 emit_insn (pinsr (d
, gen_lowpart (dstmode
, dst
), src
,
42398 GEN_INT (1 << pos
)));
42400 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
42409 /* This function returns the calling abi specific va_list type node.
42410 It returns the FNDECL specific va_list type. */
42413 ix86_fn_abi_va_list (tree fndecl
)
42416 return va_list_type_node
;
42417 gcc_assert (fndecl
!= NULL_TREE
);
42419 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
42420 return ms_va_list_type_node
;
42422 return sysv_va_list_type_node
;
42425 /* Returns the canonical va_list type specified by TYPE. If there
42426 is no valid TYPE provided, it return NULL_TREE. */
42429 ix86_canonical_va_list_type (tree type
)
42433 /* Resolve references and pointers to va_list type. */
42434 if (TREE_CODE (type
) == MEM_REF
)
42435 type
= TREE_TYPE (type
);
42436 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
42437 type
= TREE_TYPE (type
);
42438 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
42439 type
= TREE_TYPE (type
);
42441 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
42443 wtype
= va_list_type_node
;
42444 gcc_assert (wtype
!= NULL_TREE
);
42446 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42448 /* If va_list is an array type, the argument may have decayed
42449 to a pointer type, e.g. by being passed to another function.
42450 In that case, unwrap both types so that we can compare the
42451 underlying records. */
42452 if (TREE_CODE (htype
) == ARRAY_TYPE
42453 || POINTER_TYPE_P (htype
))
42455 wtype
= TREE_TYPE (wtype
);
42456 htype
= TREE_TYPE (htype
);
42459 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42460 return va_list_type_node
;
42461 wtype
= sysv_va_list_type_node
;
42462 gcc_assert (wtype
!= NULL_TREE
);
42464 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42466 /* If va_list is an array type, the argument may have decayed
42467 to a pointer type, e.g. by being passed to another function.
42468 In that case, unwrap both types so that we can compare the
42469 underlying records. */
42470 if (TREE_CODE (htype
) == ARRAY_TYPE
42471 || POINTER_TYPE_P (htype
))
42473 wtype
= TREE_TYPE (wtype
);
42474 htype
= TREE_TYPE (htype
);
42477 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42478 return sysv_va_list_type_node
;
42479 wtype
= ms_va_list_type_node
;
42480 gcc_assert (wtype
!= NULL_TREE
);
42482 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42484 /* If va_list is an array type, the argument may have decayed
42485 to a pointer type, e.g. by being passed to another function.
42486 In that case, unwrap both types so that we can compare the
42487 underlying records. */
42488 if (TREE_CODE (htype
) == ARRAY_TYPE
42489 || POINTER_TYPE_P (htype
))
42491 wtype
= TREE_TYPE (wtype
);
42492 htype
= TREE_TYPE (htype
);
42495 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42496 return ms_va_list_type_node
;
42499 return std_canonical_va_list_type (type
);
42502 /* Iterate through the target-specific builtin types for va_list.
42503 IDX denotes the iterator, *PTREE is set to the result type of
42504 the va_list builtin, and *PNAME to its internal type.
42505 Returns zero if there is no element for this index, otherwise
42506 IDX should be increased upon the next call.
42507 Note, do not iterate a base builtin's name like __builtin_va_list.
42508 Used from c_common_nodes_and_builtins. */
42511 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
42521 *ptree
= ms_va_list_type_node
;
42522 *pname
= "__builtin_ms_va_list";
42526 *ptree
= sysv_va_list_type_node
;
42527 *pname
= "__builtin_sysv_va_list";
42535 #undef TARGET_SCHED_DISPATCH
42536 #define TARGET_SCHED_DISPATCH has_dispatch
42537 #undef TARGET_SCHED_DISPATCH_DO
42538 #define TARGET_SCHED_DISPATCH_DO do_dispatch
42539 #undef TARGET_SCHED_REASSOCIATION_WIDTH
42540 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
42541 #undef TARGET_SCHED_REORDER
42542 #define TARGET_SCHED_REORDER ix86_sched_reorder
42543 #undef TARGET_SCHED_ADJUST_PRIORITY
42544 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
42545 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
42546 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
42547 ix86_dependencies_evaluation_hook
42549 /* The size of the dispatch window is the total number of bytes of
42550 object code allowed in a window. */
42551 #define DISPATCH_WINDOW_SIZE 16
42553 /* Number of dispatch windows considered for scheduling. */
42554 #define MAX_DISPATCH_WINDOWS 3
42556 /* Maximum number of instructions in a window. */
42559 /* Maximum number of immediate operands in a window. */
42562 /* Maximum number of immediate bits allowed in a window. */
42563 #define MAX_IMM_SIZE 128
42565 /* Maximum number of 32 bit immediates allowed in a window. */
42566 #define MAX_IMM_32 4
42568 /* Maximum number of 64 bit immediates allowed in a window. */
42569 #define MAX_IMM_64 2
42571 /* Maximum total of loads or prefetches allowed in a window. */
42574 /* Maximum total of stores allowed in a window. */
42575 #define MAX_STORE 1
42581 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
42582 enum dispatch_group
{
42597 /* Number of allowable groups in a dispatch window. It is an array
42598 indexed by dispatch_group enum. 100 is used as a big number,
42599 because the number of these kind of operations does not have any
42600 effect in dispatch window, but we need them for other reasons in
42602 static unsigned int num_allowable_groups
[disp_last
] = {
42603 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
42606 char group_name
[disp_last
+ 1][16] = {
42607 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
42608 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
42609 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
42612 /* Instruction path. */
42615 path_single
, /* Single micro op. */
42616 path_double
, /* Double micro op. */
42617 path_multi
, /* Instructions with more than 2 micro op.. */
42621 /* sched_insn_info defines a window to the instructions scheduled in
42622 the basic block. It contains a pointer to the insn_info table and
42623 the instruction scheduled.
42625 Windows are allocated for each basic block and are linked
42627 typedef struct sched_insn_info_s
{
42629 enum dispatch_group group
;
42630 enum insn_path path
;
42635 /* Linked list of dispatch windows. This is a two way list of
42636 dispatch windows of a basic block. It contains information about
42637 the number of uops in the window and the total number of
42638 instructions and of bytes in the object code for this dispatch
42640 typedef struct dispatch_windows_s
{
42641 int num_insn
; /* Number of insn in the window. */
42642 int num_uops
; /* Number of uops in the window. */
42643 int window_size
; /* Number of bytes in the window. */
42644 int window_num
; /* Window number between 0 or 1. */
42645 int num_imm
; /* Number of immediates in an insn. */
42646 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
42647 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
42648 int imm_size
; /* Total immediates in the window. */
42649 int num_loads
; /* Total memory loads in the window. */
42650 int num_stores
; /* Total memory stores in the window. */
42651 int violation
; /* Violation exists in window. */
42652 sched_insn_info
*window
; /* Pointer to the window. */
42653 struct dispatch_windows_s
*next
;
42654 struct dispatch_windows_s
*prev
;
42655 } dispatch_windows
;
42657 /* Immediate valuse used in an insn. */
42658 typedef struct imm_info_s
42665 static dispatch_windows
*dispatch_window_list
;
42666 static dispatch_windows
*dispatch_window_list1
;
42668 /* Get dispatch group of insn. */
42670 static enum dispatch_group
42671 get_mem_group (rtx insn
)
42673 enum attr_memory memory
;
42675 if (INSN_CODE (insn
) < 0)
42676 return disp_no_group
;
42677 memory
= get_attr_memory (insn
);
42678 if (memory
== MEMORY_STORE
)
42681 if (memory
== MEMORY_LOAD
)
42684 if (memory
== MEMORY_BOTH
)
42685 return disp_load_store
;
42687 return disp_no_group
;
42690 /* Return true if insn is a compare instruction. */
42695 enum attr_type type
;
42697 type
= get_attr_type (insn
);
42698 return (type
== TYPE_TEST
42699 || type
== TYPE_ICMP
42700 || type
== TYPE_FCMP
42701 || GET_CODE (PATTERN (insn
)) == COMPARE
);
42704 /* Return true if a dispatch violation encountered. */
42707 dispatch_violation (void)
42709 if (dispatch_window_list
->next
)
42710 return dispatch_window_list
->next
->violation
;
42711 return dispatch_window_list
->violation
;
42714 /* Return true if insn is a branch instruction. */
42717 is_branch (rtx insn
)
42719 return (CALL_P (insn
) || JUMP_P (insn
));
42722 /* Return true if insn is a prefetch instruction. */
42725 is_prefetch (rtx insn
)
42727 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
42730 /* This function initializes a dispatch window and the list container holding a
42731 pointer to the window. */
42734 init_window (int window_num
)
42737 dispatch_windows
*new_list
;
42739 if (window_num
== 0)
42740 new_list
= dispatch_window_list
;
42742 new_list
= dispatch_window_list1
;
42744 new_list
->num_insn
= 0;
42745 new_list
->num_uops
= 0;
42746 new_list
->window_size
= 0;
42747 new_list
->next
= NULL
;
42748 new_list
->prev
= NULL
;
42749 new_list
->window_num
= window_num
;
42750 new_list
->num_imm
= 0;
42751 new_list
->num_imm_32
= 0;
42752 new_list
->num_imm_64
= 0;
42753 new_list
->imm_size
= 0;
42754 new_list
->num_loads
= 0;
42755 new_list
->num_stores
= 0;
42756 new_list
->violation
= false;
42758 for (i
= 0; i
< MAX_INSN
; i
++)
42760 new_list
->window
[i
].insn
= NULL
;
42761 new_list
->window
[i
].group
= disp_no_group
;
42762 new_list
->window
[i
].path
= no_path
;
42763 new_list
->window
[i
].byte_len
= 0;
42764 new_list
->window
[i
].imm_bytes
= 0;
42769 /* This function allocates and initializes a dispatch window and the
42770 list container holding a pointer to the window. */
42772 static dispatch_windows
*
42773 allocate_window (void)
42775 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
42776 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42781 /* This routine initializes the dispatch scheduling information. It
42782 initiates building dispatch scheduler tables and constructs the
42783 first dispatch window. */
42786 init_dispatch_sched (void)
42788 /* Allocate a dispatch list and a window. */
42789 dispatch_window_list
= allocate_window ();
42790 dispatch_window_list1
= allocate_window ();
42795 /* This function returns true if a branch is detected. End of a basic block
42796 does not have to be a branch, but here we assume only branches end a
42800 is_end_basic_block (enum dispatch_group group
)
42802 return group
== disp_branch
;
42805 /* This function is called when the end of a window processing is reached. */
42808 process_end_window (void)
42810 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42811 if (dispatch_window_list
->next
)
42813 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42814 gcc_assert (dispatch_window_list
->window_size
42815 + dispatch_window_list1
->window_size
<= 48);
42821 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42822 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42823 for 48 bytes of instructions. Note that these windows are not dispatch
42824 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42826 static dispatch_windows
*
42827 allocate_next_window (int window_num
)
42829 if (window_num
== 0)
42831 if (dispatch_window_list
->next
)
42834 return dispatch_window_list
;
42837 dispatch_window_list
->next
= dispatch_window_list1
;
42838 dispatch_window_list1
->prev
= dispatch_window_list
;
42840 return dispatch_window_list1
;
42843 /* Increment the number of immediate operands of an instruction. */
42846 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
42851 switch ( GET_CODE (*in_rtx
))
42856 (imm_values
->imm
)++;
42857 if (x86_64_immediate_operand (*in_rtx
, SImode
))
42858 (imm_values
->imm32
)++;
42860 (imm_values
->imm64
)++;
42864 (imm_values
->imm
)++;
42865 (imm_values
->imm64
)++;
42869 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
42871 (imm_values
->imm
)++;
42872 (imm_values
->imm32
)++;
42883 /* Compute number of immediate operands of an instruction. */
42886 find_constant (rtx in_rtx
, imm_info
*imm_values
)
42888 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
42889 (rtx_function
) find_constant_1
, (void *) imm_values
);
42892 /* Return total size of immediate operands of an instruction along with number
42893 of corresponding immediate-operands. It initializes its parameters to zero
42894 befor calling FIND_CONSTANT.
42895 INSN is the input instruction. IMM is the total of immediates.
42896 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
42900 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
42902 imm_info imm_values
= {0, 0, 0};
42904 find_constant (insn
, &imm_values
);
42905 *imm
= imm_values
.imm
;
42906 *imm32
= imm_values
.imm32
;
42907 *imm64
= imm_values
.imm64
;
42908 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
42911 /* This function indicates if an operand of an instruction is an
42915 has_immediate (rtx insn
)
42917 int num_imm_operand
;
42918 int num_imm32_operand
;
42919 int num_imm64_operand
;
42922 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42923 &num_imm64_operand
);
42927 /* Return single or double path for instructions. */
42929 static enum insn_path
42930 get_insn_path (rtx insn
)
42932 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
42934 if ((int)path
== 0)
42935 return path_single
;
42937 if ((int)path
== 1)
42938 return path_double
;
42943 /* Return insn dispatch group. */
42945 static enum dispatch_group
42946 get_insn_group (rtx insn
)
42948 enum dispatch_group group
= get_mem_group (insn
);
42952 if (is_branch (insn
))
42953 return disp_branch
;
42958 if (has_immediate (insn
))
42961 if (is_prefetch (insn
))
42962 return disp_prefetch
;
42964 return disp_no_group
;
42967 /* Count number of GROUP restricted instructions in a dispatch
42968 window WINDOW_LIST. */
42971 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
42973 enum dispatch_group group
= get_insn_group (insn
);
42975 int num_imm_operand
;
42976 int num_imm32_operand
;
42977 int num_imm64_operand
;
42979 if (group
== disp_no_group
)
42982 if (group
== disp_imm
)
42984 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42985 &num_imm64_operand
);
42986 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
42987 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
42988 || (num_imm32_operand
> 0
42989 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
42990 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
42991 || (num_imm64_operand
> 0
42992 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
42993 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
42994 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
42995 && num_imm64_operand
> 0
42996 && ((window_list
->num_imm_64
> 0
42997 && window_list
->num_insn
>= 2)
42998 || window_list
->num_insn
>= 3)))
43004 if ((group
== disp_load_store
43005 && (window_list
->num_loads
>= MAX_LOAD
43006 || window_list
->num_stores
>= MAX_STORE
))
43007 || ((group
== disp_load
43008 || group
== disp_prefetch
)
43009 && window_list
->num_loads
>= MAX_LOAD
)
43010 || (group
== disp_store
43011 && window_list
->num_stores
>= MAX_STORE
))
43017 /* This function returns true if insn satisfies dispatch rules on the
43018 last window scheduled. */
43021 fits_dispatch_window (rtx insn
)
43023 dispatch_windows
*window_list
= dispatch_window_list
;
43024 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
43025 unsigned int num_restrict
;
43026 enum dispatch_group group
= get_insn_group (insn
);
43027 enum insn_path path
= get_insn_path (insn
);
43030 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
43031 instructions should be given the lowest priority in the
43032 scheduling process in Haifa scheduler to make sure they will be
43033 scheduled in the same dispatch window as the reference to them. */
43034 if (group
== disp_jcc
|| group
== disp_cmp
)
43037 /* Check nonrestricted. */
43038 if (group
== disp_no_group
|| group
== disp_branch
)
43041 /* Get last dispatch window. */
43042 if (window_list_next
)
43043 window_list
= window_list_next
;
43045 if (window_list
->window_num
== 1)
43047 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
43050 || (min_insn_size (insn
) + sum
) >= 48)
43051 /* Window 1 is full. Go for next window. */
43055 num_restrict
= count_num_restricted (insn
, window_list
);
43057 if (num_restrict
> num_allowable_groups
[group
])
43060 /* See if it fits in the first window. */
43061 if (window_list
->window_num
== 0)
43063 /* The first widow should have only single and double path
43065 if (path
== path_double
43066 && (window_list
->num_uops
+ 2) > MAX_INSN
)
43068 else if (path
!= path_single
)
43074 /* Add an instruction INSN with NUM_UOPS micro-operations to the
43075 dispatch window WINDOW_LIST. */
43078 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
43080 int byte_len
= min_insn_size (insn
);
43081 int num_insn
= window_list
->num_insn
;
43083 sched_insn_info
*window
= window_list
->window
;
43084 enum dispatch_group group
= get_insn_group (insn
);
43085 enum insn_path path
= get_insn_path (insn
);
43086 int num_imm_operand
;
43087 int num_imm32_operand
;
43088 int num_imm64_operand
;
43090 if (!window_list
->violation
&& group
!= disp_cmp
43091 && !fits_dispatch_window (insn
))
43092 window_list
->violation
= true;
43094 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43095 &num_imm64_operand
);
43097 /* Initialize window with new instruction. */
43098 window
[num_insn
].insn
= insn
;
43099 window
[num_insn
].byte_len
= byte_len
;
43100 window
[num_insn
].group
= group
;
43101 window
[num_insn
].path
= path
;
43102 window
[num_insn
].imm_bytes
= imm_size
;
43104 window_list
->window_size
+= byte_len
;
43105 window_list
->num_insn
= num_insn
+ 1;
43106 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
43107 window_list
->imm_size
+= imm_size
;
43108 window_list
->num_imm
+= num_imm_operand
;
43109 window_list
->num_imm_32
+= num_imm32_operand
;
43110 window_list
->num_imm_64
+= num_imm64_operand
;
43112 if (group
== disp_store
)
43113 window_list
->num_stores
+= 1;
43114 else if (group
== disp_load
43115 || group
== disp_prefetch
)
43116 window_list
->num_loads
+= 1;
43117 else if (group
== disp_load_store
)
43119 window_list
->num_stores
+= 1;
43120 window_list
->num_loads
+= 1;
43124 /* Adds a scheduled instruction, INSN, to the current dispatch window.
43125 If the total bytes of instructions or the number of instructions in
43126 the window exceed allowable, it allocates a new window. */
43129 add_to_dispatch_window (rtx insn
)
43132 dispatch_windows
*window_list
;
43133 dispatch_windows
*next_list
;
43134 dispatch_windows
*window0_list
;
43135 enum insn_path path
;
43136 enum dispatch_group insn_group
;
43144 if (INSN_CODE (insn
) < 0)
43147 byte_len
= min_insn_size (insn
);
43148 window_list
= dispatch_window_list
;
43149 next_list
= window_list
->next
;
43150 path
= get_insn_path (insn
);
43151 insn_group
= get_insn_group (insn
);
43153 /* Get the last dispatch window. */
43155 window_list
= dispatch_window_list
->next
;
43157 if (path
== path_single
)
43159 else if (path
== path_double
)
43162 insn_num_uops
= (int) path
;
43164 /* If current window is full, get a new window.
43165 Window number zero is full, if MAX_INSN uops are scheduled in it.
43166 Window number one is full, if window zero's bytes plus window
43167 one's bytes is 32, or if the bytes of the new instruction added
43168 to the total makes it greater than 48, or it has already MAX_INSN
43169 instructions in it. */
43170 num_insn
= window_list
->num_insn
;
43171 num_uops
= window_list
->num_uops
;
43172 window_num
= window_list
->window_num
;
43173 insn_fits
= fits_dispatch_window (insn
);
43175 if (num_insn
>= MAX_INSN
43176 || num_uops
+ insn_num_uops
> MAX_INSN
43179 window_num
= ~window_num
& 1;
43180 window_list
= allocate_next_window (window_num
);
43183 if (window_num
== 0)
43185 add_insn_window (insn
, window_list
, insn_num_uops
);
43186 if (window_list
->num_insn
>= MAX_INSN
43187 && insn_group
== disp_branch
)
43189 process_end_window ();
43193 else if (window_num
== 1)
43195 window0_list
= window_list
->prev
;
43196 sum
= window0_list
->window_size
+ window_list
->window_size
;
43198 || (byte_len
+ sum
) >= 48)
43200 process_end_window ();
43201 window_list
= dispatch_window_list
;
43204 add_insn_window (insn
, window_list
, insn_num_uops
);
43207 gcc_unreachable ();
43209 if (is_end_basic_block (insn_group
))
43211 /* End of basic block is reached do end-basic-block process. */
43212 process_end_window ();
43217 /* Print the dispatch window, WINDOW_NUM, to FILE. */
43219 DEBUG_FUNCTION
static void
43220 debug_dispatch_window_file (FILE *file
, int window_num
)
43222 dispatch_windows
*list
;
43225 if (window_num
== 0)
43226 list
= dispatch_window_list
;
43228 list
= dispatch_window_list1
;
43230 fprintf (file
, "Window #%d:\n", list
->window_num
);
43231 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
43232 list
->num_insn
, list
->num_uops
, list
->window_size
);
43233 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43234 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
43236 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
43238 fprintf (file
, " insn info:\n");
43240 for (i
= 0; i
< MAX_INSN
; i
++)
43242 if (!list
->window
[i
].insn
)
43244 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
43245 i
, group_name
[list
->window
[i
].group
],
43246 i
, (void *)list
->window
[i
].insn
,
43247 i
, list
->window
[i
].path
,
43248 i
, list
->window
[i
].byte_len
,
43249 i
, list
->window
[i
].imm_bytes
);
43253 /* Print to stdout a dispatch window. */
43255 DEBUG_FUNCTION
void
43256 debug_dispatch_window (int window_num
)
43258 debug_dispatch_window_file (stdout
, window_num
);
43261 /* Print INSN dispatch information to FILE. */
43263 DEBUG_FUNCTION
static void
43264 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
43267 enum insn_path path
;
43268 enum dispatch_group group
;
43270 int num_imm_operand
;
43271 int num_imm32_operand
;
43272 int num_imm64_operand
;
43274 if (INSN_CODE (insn
) < 0)
43277 byte_len
= min_insn_size (insn
);
43278 path
= get_insn_path (insn
);
43279 group
= get_insn_group (insn
);
43280 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43281 &num_imm64_operand
);
43283 fprintf (file
, " insn info:\n");
43284 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
43285 group_name
[group
], path
, byte_len
);
43286 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43287 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
43290 /* Print to STDERR the status of the ready list with respect to
43291 dispatch windows. */
43293 DEBUG_FUNCTION
void
43294 debug_ready_dispatch (void)
43297 int no_ready
= number_in_ready ();
43299 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
43301 for (i
= 0; i
< no_ready
; i
++)
43302 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
43305 /* This routine is the driver of the dispatch scheduler. */
43308 do_dispatch (rtx insn
, int mode
)
43310 if (mode
== DISPATCH_INIT
)
43311 init_dispatch_sched ();
43312 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
43313 add_to_dispatch_window (insn
);
43316 /* Return TRUE if Dispatch Scheduling is supported. */
43319 has_dispatch (rtx insn
, int action
)
43321 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
43322 && flag_dispatch_scheduler
)
43328 case IS_DISPATCH_ON
:
43333 return is_cmp (insn
);
43335 case DISPATCH_VIOLATION
:
43336 return dispatch_violation ();
43338 case FITS_DISPATCH_WINDOW
:
43339 return fits_dispatch_window (insn
);
43345 /* Implementation of reassociation_width target hook used by
43346 reassoc phase to identify parallelism level in reassociated
43347 tree. Statements tree_code is passed in OPC. Arguments type
43350 Currently parallel reassociation is enabled for Atom
43351 processors only and we set reassociation width to be 2
43352 because Atom may issue up to 2 instructions per cycle.
43354 Return value should be fixed if parallel reassociation is
43355 enabled for other processors. */
43358 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
43359 enum machine_mode mode
)
43363 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
43365 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
43371 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
43372 place emms and femms instructions. */
43374 static enum machine_mode
43375 ix86_preferred_simd_mode (enum machine_mode mode
)
43383 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
43385 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
43387 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
43389 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
43392 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43398 if (!TARGET_VECTORIZE_DOUBLE
)
43400 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43402 else if (TARGET_SSE2
)
43411 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
43414 static unsigned int
43415 ix86_autovectorize_vector_sizes (void)
43417 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
43422 /* Return class of registers which could be used for pseudo of MODE
43423 and of class RCLASS for spilling instead of memory. Return NO_REGS
43424 if it is not possible or non-profitable. */
43426 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
43428 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
43429 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
43430 && INTEGER_CLASS_P (rclass
))
43431 return ALL_SSE_REGS
;
43435 /* Implement targetm.vectorize.init_cost. */
43438 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
43440 unsigned *cost
= XNEWVEC (unsigned, 3);
43441 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
43445 /* Implement targetm.vectorize.add_stmt_cost. */
43448 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
43449 struct _stmt_vec_info
*stmt_info
, int misalign
,
43450 enum vect_cost_model_location where
)
43452 unsigned *cost
= (unsigned *) data
;
43453 unsigned retval
= 0;
43455 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
43456 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
43458 /* Statements in an inner loop relative to the loop being
43459 vectorized are weighted more heavily. The value here is
43460 arbitrary and could potentially be improved with analysis. */
43461 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
43462 count
*= 50; /* FIXME. */
43464 retval
= (unsigned) (count
* stmt_cost
);
43465 cost
[where
] += retval
;
43470 /* Implement targetm.vectorize.finish_cost. */
43473 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
43474 unsigned *body_cost
, unsigned *epilogue_cost
)
43476 unsigned *cost
= (unsigned *) data
;
43477 *prologue_cost
= cost
[vect_prologue
];
43478 *body_cost
= cost
[vect_body
];
43479 *epilogue_cost
= cost
[vect_epilogue
];
43482 /* Implement targetm.vectorize.destroy_cost_data. */
43485 ix86_destroy_cost_data (void *data
)
43490 /* Validate target specific memory model bits in VAL. */
43492 static unsigned HOST_WIDE_INT
43493 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
43495 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
43498 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
43500 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
43502 warning (OPT_Winvalid_memory_model
,
43503 "Unknown architecture specific memory model");
43504 return MEMMODEL_SEQ_CST
;
43506 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
43507 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
43509 warning (OPT_Winvalid_memory_model
,
43510 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
43511 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
43513 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
43515 warning (OPT_Winvalid_memory_model
,
43516 "HLE_RELEASE not used with RELEASE or stronger memory model");
43517 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
43522 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
43525 ix86_float_exceptions_rounding_supported_p (void)
43527 /* For x87 floating point with standard excess precision handling,
43528 there is no adddf3 pattern (since x87 floating point only has
43529 XFmode operations) so the default hook implementation gets this
43531 return TARGET_80387
|| TARGET_SSE_MATH
;
43534 /* Initialize the GCC target structure. */
43535 #undef TARGET_RETURN_IN_MEMORY
43536 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
43538 #undef TARGET_LEGITIMIZE_ADDRESS
43539 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
43541 #undef TARGET_ATTRIBUTE_TABLE
43542 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
43543 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
43544 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
43545 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43546 # undef TARGET_MERGE_DECL_ATTRIBUTES
43547 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
43550 #undef TARGET_COMP_TYPE_ATTRIBUTES
43551 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
43553 #undef TARGET_INIT_BUILTINS
43554 #define TARGET_INIT_BUILTINS ix86_init_builtins
43555 #undef TARGET_BUILTIN_DECL
43556 #define TARGET_BUILTIN_DECL ix86_builtin_decl
43557 #undef TARGET_EXPAND_BUILTIN
43558 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
43560 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
43561 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
43562 ix86_builtin_vectorized_function
43564 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
43565 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
43567 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
43568 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
43570 #undef TARGET_VECTORIZE_BUILTIN_GATHER
43571 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
43573 #undef TARGET_BUILTIN_RECIPROCAL
43574 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
43576 #undef TARGET_ASM_FUNCTION_EPILOGUE
43577 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
43579 #undef TARGET_ENCODE_SECTION_INFO
43580 #ifndef SUBTARGET_ENCODE_SECTION_INFO
43581 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
43583 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
43586 #undef TARGET_ASM_OPEN_PAREN
43587 #define TARGET_ASM_OPEN_PAREN ""
43588 #undef TARGET_ASM_CLOSE_PAREN
43589 #define TARGET_ASM_CLOSE_PAREN ""
43591 #undef TARGET_ASM_BYTE_OP
43592 #define TARGET_ASM_BYTE_OP ASM_BYTE
43594 #undef TARGET_ASM_ALIGNED_HI_OP
43595 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
43596 #undef TARGET_ASM_ALIGNED_SI_OP
43597 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
43599 #undef TARGET_ASM_ALIGNED_DI_OP
43600 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
43603 #undef TARGET_PROFILE_BEFORE_PROLOGUE
43604 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
43606 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
43607 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
43609 #undef TARGET_ASM_UNALIGNED_HI_OP
43610 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
43611 #undef TARGET_ASM_UNALIGNED_SI_OP
43612 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
43613 #undef TARGET_ASM_UNALIGNED_DI_OP
43614 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
43616 #undef TARGET_PRINT_OPERAND
43617 #define TARGET_PRINT_OPERAND ix86_print_operand
43618 #undef TARGET_PRINT_OPERAND_ADDRESS
43619 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
43620 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
43621 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
43622 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
43623 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
43625 #undef TARGET_SCHED_INIT_GLOBAL
43626 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
43627 #undef TARGET_SCHED_ADJUST_COST
43628 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
43629 #undef TARGET_SCHED_ISSUE_RATE
43630 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
43631 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
43632 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
43633 ia32_multipass_dfa_lookahead
43634 #undef TARGET_SCHED_MACRO_FUSION_P
43635 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
43636 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
43637 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
43639 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
43640 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
43642 #undef TARGET_MEMMODEL_CHECK
43643 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
43646 #undef TARGET_HAVE_TLS
43647 #define TARGET_HAVE_TLS true
43649 #undef TARGET_CANNOT_FORCE_CONST_MEM
43650 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
43651 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
43652 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
43654 #undef TARGET_DELEGITIMIZE_ADDRESS
43655 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
43657 #undef TARGET_MS_BITFIELD_LAYOUT_P
43658 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
43661 #undef TARGET_BINDS_LOCAL_P
43662 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
43664 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43665 #undef TARGET_BINDS_LOCAL_P
43666 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
43669 #undef TARGET_ASM_OUTPUT_MI_THUNK
43670 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
43671 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
43672 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
43674 #undef TARGET_ASM_FILE_START
43675 #define TARGET_ASM_FILE_START x86_file_start
43677 #undef TARGET_OPTION_OVERRIDE
43678 #define TARGET_OPTION_OVERRIDE ix86_option_override
43680 #undef TARGET_REGISTER_MOVE_COST
43681 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
43682 #undef TARGET_MEMORY_MOVE_COST
43683 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
43684 #undef TARGET_RTX_COSTS
43685 #define TARGET_RTX_COSTS ix86_rtx_costs
43686 #undef TARGET_ADDRESS_COST
43687 #define TARGET_ADDRESS_COST ix86_address_cost
43689 #undef TARGET_FIXED_CONDITION_CODE_REGS
43690 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
43691 #undef TARGET_CC_MODES_COMPATIBLE
43692 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
43694 #undef TARGET_MACHINE_DEPENDENT_REORG
43695 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
43697 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
43698 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
43700 #undef TARGET_BUILD_BUILTIN_VA_LIST
43701 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
43703 #undef TARGET_FOLD_BUILTIN
43704 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
43706 #undef TARGET_COMPARE_VERSION_PRIORITY
43707 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
43709 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
43710 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
43711 ix86_generate_version_dispatcher_body
43713 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
43714 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
43715 ix86_get_function_versions_dispatcher
43717 #undef TARGET_ENUM_VA_LIST_P
43718 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
43720 #undef TARGET_FN_ABI_VA_LIST
43721 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
43723 #undef TARGET_CANONICAL_VA_LIST_TYPE
43724 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
43726 #undef TARGET_EXPAND_BUILTIN_VA_START
43727 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
43729 #undef TARGET_MD_ASM_CLOBBERS
43730 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
43732 #undef TARGET_PROMOTE_PROTOTYPES
43733 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
43734 #undef TARGET_STRUCT_VALUE_RTX
43735 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
43736 #undef TARGET_SETUP_INCOMING_VARARGS
43737 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
43738 #undef TARGET_MUST_PASS_IN_STACK
43739 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
43740 #undef TARGET_FUNCTION_ARG_ADVANCE
43741 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
43742 #undef TARGET_FUNCTION_ARG
43743 #define TARGET_FUNCTION_ARG ix86_function_arg
43744 #undef TARGET_FUNCTION_ARG_BOUNDARY
43745 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
43746 #undef TARGET_PASS_BY_REFERENCE
43747 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
43748 #undef TARGET_INTERNAL_ARG_POINTER
43749 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
43750 #undef TARGET_UPDATE_STACK_BOUNDARY
43751 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
43752 #undef TARGET_GET_DRAP_RTX
43753 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
43754 #undef TARGET_STRICT_ARGUMENT_NAMING
43755 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
43756 #undef TARGET_STATIC_CHAIN
43757 #define TARGET_STATIC_CHAIN ix86_static_chain
43758 #undef TARGET_TRAMPOLINE_INIT
43759 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
43760 #undef TARGET_RETURN_POPS_ARGS
43761 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
43763 #undef TARGET_LEGITIMATE_COMBINED_INSN
43764 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
43766 #undef TARGET_ASAN_SHADOW_OFFSET
43767 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
43769 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
43770 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
43772 #undef TARGET_SCALAR_MODE_SUPPORTED_P
43773 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
43775 #undef TARGET_VECTOR_MODE_SUPPORTED_P
43776 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
43778 #undef TARGET_C_MODE_FOR_SUFFIX
43779 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
43782 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
43783 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
43786 #ifdef SUBTARGET_INSERT_ATTRIBUTES
43787 #undef TARGET_INSERT_ATTRIBUTES
43788 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
43791 #undef TARGET_MANGLE_TYPE
43792 #define TARGET_MANGLE_TYPE ix86_mangle_type
43795 #undef TARGET_STACK_PROTECT_FAIL
43796 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
43799 #undef TARGET_FUNCTION_VALUE
43800 #define TARGET_FUNCTION_VALUE ix86_function_value
43802 #undef TARGET_FUNCTION_VALUE_REGNO_P
43803 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
43805 #undef TARGET_PROMOTE_FUNCTION_MODE
43806 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
43808 #undef TARGET_MEMBER_TYPE_FORCES_BLK
43809 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
43811 #undef TARGET_INSTANTIATE_DECLS
43812 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
43814 #undef TARGET_SECONDARY_RELOAD
43815 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
43817 #undef TARGET_CLASS_MAX_NREGS
43818 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
43820 #undef TARGET_PREFERRED_RELOAD_CLASS
43821 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
43822 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
43823 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
43824 #undef TARGET_CLASS_LIKELY_SPILLED_P
43825 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
43827 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
43828 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
43829 ix86_builtin_vectorization_cost
43830 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
43831 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
43832 ix86_vectorize_vec_perm_const_ok
43833 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
43834 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
43835 ix86_preferred_simd_mode
43836 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
43837 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
43838 ix86_autovectorize_vector_sizes
43839 #undef TARGET_VECTORIZE_INIT_COST
43840 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
43841 #undef TARGET_VECTORIZE_ADD_STMT_COST
43842 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
43843 #undef TARGET_VECTORIZE_FINISH_COST
43844 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
43845 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
43846 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
43848 #undef TARGET_SET_CURRENT_FUNCTION
43849 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
43851 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
43852 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
43854 #undef TARGET_OPTION_SAVE
43855 #define TARGET_OPTION_SAVE ix86_function_specific_save
43857 #undef TARGET_OPTION_RESTORE
43858 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
43860 #undef TARGET_OPTION_PRINT
43861 #define TARGET_OPTION_PRINT ix86_function_specific_print
43863 #undef TARGET_OPTION_FUNCTION_VERSIONS
43864 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
43866 #undef TARGET_CAN_INLINE_P
43867 #define TARGET_CAN_INLINE_P ix86_can_inline_p
43869 #undef TARGET_EXPAND_TO_RTL_HOOK
43870 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
43872 #undef TARGET_LEGITIMATE_ADDRESS_P
43873 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
43875 #undef TARGET_LRA_P
43876 #define TARGET_LRA_P hook_bool_void_true
43878 #undef TARGET_REGISTER_PRIORITY
43879 #define TARGET_REGISTER_PRIORITY ix86_register_priority
43881 #undef TARGET_REGISTER_USAGE_LEVELING_P
43882 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
43884 #undef TARGET_LEGITIMATE_CONSTANT_P
43885 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
43887 #undef TARGET_FRAME_POINTER_REQUIRED
43888 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
43890 #undef TARGET_CAN_ELIMINATE
43891 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
43893 #undef TARGET_EXTRA_LIVE_ON_ENTRY
43894 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
43896 #undef TARGET_ASM_CODE_END
43897 #define TARGET_ASM_CODE_END ix86_code_end
43899 #undef TARGET_CONDITIONAL_REGISTER_USAGE
43900 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
43903 #undef TARGET_INIT_LIBFUNCS
43904 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
43907 #undef TARGET_SPILL_CLASS
43908 #define TARGET_SPILL_CLASS ix86_spill_class
43910 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
43911 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
43912 ix86_float_exceptions_rounding_supported_p
43914 struct gcc_target targetm
= TARGET_INITIALIZER
;
43916 #include "gt-i386.h"