1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 #include "stringpool.h"
29 #include "stor-layout.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
45 #include "diagnostic-core.h"
47 #include "basic-block.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
55 #include "hash-table.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
62 #include "gimple-expr.h"
69 #include "tm-constrs.h"
73 #include "sched-int.h"
77 #include "diagnostic.h"
79 #include "tree-pass.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
88 static rtx
legitimize_dllimport_symbol (rtx
, bool);
89 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
90 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
110 static stringop_algs ix86_size_memcpy
[2] = {
111 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
112 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
113 static stringop_algs ix86_size_memset
[2] = {
114 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
115 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
118 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy
[2] = {
187 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
188 DUMMY_STRINGOP_ALGS
};
189 static stringop_algs i386_memset
[2] = {
190 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
191 DUMMY_STRINGOP_ALGS
};
194 struct processor_costs i386_cost
= { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
261 static stringop_algs i486_memcpy
[2] = {
262 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
263 DUMMY_STRINGOP_ALGS
};
264 static stringop_algs i486_memset
[2] = {
265 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
266 DUMMY_STRINGOP_ALGS
};
269 struct processor_costs i486_cost
= { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
338 static stringop_algs pentium_memcpy
[2] = {
339 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
340 DUMMY_STRINGOP_ALGS
};
341 static stringop_algs pentium_memset
[2] = {
342 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
343 DUMMY_STRINGOP_ALGS
};
346 struct processor_costs pentium_cost
= {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy
[2] = {
419 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
420 {8192, rep_prefix_4_byte
, false},
421 {-1, rep_prefix_1_byte
, false}}},
422 DUMMY_STRINGOP_ALGS
};
423 static stringop_algs pentiumpro_memset
[2] = {
424 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
425 {8192, rep_prefix_4_byte
, false},
426 {-1, libcall
, false}}},
427 DUMMY_STRINGOP_ALGS
};
429 struct processor_costs pentiumpro_cost
= {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
496 static stringop_algs geode_memcpy
[2] = {
497 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
498 DUMMY_STRINGOP_ALGS
};
499 static stringop_algs geode_memset
[2] = {
500 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
501 DUMMY_STRINGOP_ALGS
};
503 struct processor_costs geode_cost
= {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
571 static stringop_algs k6_memcpy
[2] = {
572 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
573 DUMMY_STRINGOP_ALGS
};
574 static stringop_algs k6_memset
[2] = {
575 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
576 DUMMY_STRINGOP_ALGS
};
578 struct processor_costs k6_cost
= {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy
[2] = {
652 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
653 DUMMY_STRINGOP_ALGS
};
654 static stringop_algs athlon_memset
[2] = {
655 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
656 DUMMY_STRINGOP_ALGS
};
658 struct processor_costs athlon_cost
= {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy
[2] = {
729 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
730 {-1, rep_prefix_4_byte
, false}}},
731 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
732 {-1, libcall
, false}}}};
733 static stringop_algs k8_memset
[2] = {
734 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
735 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
736 {libcall
, {{48, unrolled_loop
, false},
737 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
739 struct processor_costs k8_cost
= {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
788 100, /* number of parallel prefetches */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy
[2] = {
816 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
817 {-1, rep_prefix_4_byte
, false}}},
818 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
819 {-1, libcall
, false}}}};
820 static stringop_algs amdfam10_memset
[2] = {
821 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
822 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
823 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
824 {-1, libcall
, false}}}};
825 struct processor_costs amdfam10_cost
= {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
870 MOVD reg64, xmmreg Double FADD 3
872 MOVD reg32, xmmreg Double FADD 3
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
882 100, /* number of parallel prefetches */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy
[2] = {
910 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
911 {-1, rep_prefix_4_byte
, false}}},
912 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}};
914 static stringop_algs bdver1_memset
[2] = {
915 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
916 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
917 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
918 {-1, libcall
, false}}}};
920 const struct processor_costs bdver1_cost
= {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
965 MOVD reg64, xmmreg Double FADD 3
967 MOVD reg32, xmmreg Double FADD 3
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
977 100, /* number of parallel prefetches */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1005 static stringop_algs bdver2_memcpy
[2] = {
1006 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1007 {-1, rep_prefix_4_byte
, false}}},
1008 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1009 {-1, libcall
, false}}}};
1010 static stringop_algs bdver2_memset
[2] = {
1011 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1012 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1013 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1014 {-1, libcall
, false}}}};
1016 const struct processor_costs bdver2_cost
= {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1061 MOVD reg64, xmmreg Double FADD 3
1063 MOVD reg32, xmmreg Double FADD 3
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy
[2] = {
1102 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1103 {-1, rep_prefix_4_byte
, false}}},
1104 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1105 {-1, libcall
, false}}}};
1106 static stringop_algs bdver3_memset
[2] = {
1107 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1108 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1109 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1110 {-1, libcall
, false}}}};
1111 struct processor_costs bdver3_cost
= {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy
[2] = {
1188 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1189 {-1, rep_prefix_4_byte
, false}}},
1190 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1191 {-1, libcall
, false}}}};
1192 static stringop_algs bdver4_memset
[2] = {
1193 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1194 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1195 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1196 {-1, libcall
, false}}}};
1197 struct processor_costs bdver4_cost
= {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy
[2] = {
1274 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1275 {-1, rep_prefix_4_byte
, false}}},
1276 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1277 {-1, libcall
, false}}}};
1278 static stringop_algs btver1_memset
[2] = {
1279 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1280 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1281 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1282 {-1, libcall
, false}}}};
1283 const struct processor_costs btver1_cost
= {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1328 MOVD reg64, xmmreg Double FADD 3
1330 MOVD reg32, xmmreg Double FADD 3
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1359 static stringop_algs btver2_memcpy
[2] = {
1360 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1361 {-1, rep_prefix_4_byte
, false}}},
1362 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1363 {-1, libcall
, false}}}};
1364 static stringop_algs btver2_memset
[2] = {
1365 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1366 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1367 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1368 {-1, libcall
, false}}}};
1369 const struct processor_costs btver2_cost
= {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1414 MOVD reg64, xmmreg Double FADD 3
1416 MOVD reg32, xmmreg Double FADD 3
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1444 static stringop_algs pentium4_memcpy
[2] = {
1445 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1446 DUMMY_STRINGOP_ALGS
};
1447 static stringop_algs pentium4_memset
[2] = {
1448 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1449 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1450 DUMMY_STRINGOP_ALGS
};
1453 struct processor_costs pentium4_cost
= {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1520 static stringop_algs nocona_memcpy
[2] = {
1521 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1522 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1523 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1525 static stringop_algs nocona_memset
[2] = {
1526 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1527 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1528 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1529 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1532 struct processor_costs nocona_cost
= {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1599 static stringop_algs atom_memcpy
[2] = {
1600 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1601 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1602 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1603 static stringop_algs atom_memset
[2] = {
1604 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1605 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1606 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1607 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1609 struct processor_costs atom_cost
= {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1676 static stringop_algs slm_memcpy
[2] = {
1677 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1678 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1679 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1680 static stringop_algs slm_memset
[2] = {
1681 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1682 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1683 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1684 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1686 struct processor_costs slm_cost
= {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1753 static stringop_algs intel_memcpy
[2] = {
1754 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1755 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1756 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1757 static stringop_algs intel_memset
[2] = {
1758 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1759 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1760 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1761 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1763 struct processor_costs intel_cost
= {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1833 static stringop_algs generic_memcpy
[2] = {
1834 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1835 {-1, libcall
, false}}},
1836 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1837 {-1, libcall
, false}}}};
1838 static stringop_algs generic_memset
[2] = {
1839 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1840 {-1, libcall
, false}}},
1841 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1842 {-1, libcall
, false}}}};
1844 struct processor_costs generic_cost
= {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy
[2] = {
1919 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1920 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1921 {-1, libcall
, false}}}};
1922 static stringop_algs core_memset
[2] = {
1923 {libcall
, {{6, loop_1_byte
, true},
1925 {8192, rep_prefix_4_byte
, true},
1926 {-1, libcall
, false}}},
1927 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1928 {-1, libcall
, false}}}};
1931 struct processor_costs core_cost
= {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2004 /* Set by -mtune. */
2005 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs
*ix86_cost
= &pentium_cost
;
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2046 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386
| m_486
| m_PENT
| m_K6
),
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2094 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2095 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2100 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2102 /* ax, dx, cx, bx */
2103 AREG
, DREG
, CREG
, BREG
,
2104 /* si, di, bp, sp */
2105 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2107 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2108 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2114 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2117 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2120 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2121 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2122 /* SSE REX registers */
2123 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2127 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2128 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2129 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2130 /* Mask registers. */
2131 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2132 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2135 /* The "default" register map used in 32bit mode. */
2137 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2151 /* The "default" register map used in 64bit mode. */
2153 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2221 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2235 /* Define parameter passing and return registers. */
2237 static int const x86_64_int_parameter_registers
[6] =
2239 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2242 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2244 CX_REG
, DX_REG
, R8_REG
, R9_REG
2247 static int const x86_64_int_return_registers
[4] =
2249 AX_REG
, DX_REG
, DI_REG
, SI_REG
2252 /* Additional registers that are clobbered by SYSV calls. */
2254 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2258 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2259 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2262 /* Define the structure for the machine field in struct function. */
2264 struct GTY(()) stack_local_entry
{
2265 unsigned short mode
;
2268 struct stack_local_entry
*next
;
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2278 saved static chain if ix86_static_chain_on_stack
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2287 <- sse_regs_save_offset
2290 [va_arg registers] |
2294 [padding2] | = to_allocate
2303 int outgoing_arguments_size
;
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset
;
2307 HOST_WIDE_INT hard_frame_pointer_offset
;
2308 HOST_WIDE_INT stack_pointer_offset
;
2309 HOST_WIDE_INT hfp_save_offset
;
2310 HOST_WIDE_INT reg_save_offset
;
2311 HOST_WIDE_INT sse_reg_save_offset
;
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov
;
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule
;
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune
;
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch
;
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse
;
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string
[]
2332 = "force_align_arg_pointer";
2334 static rtx (*ix86_gen_leave
) (void);
2335 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2336 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2337 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2338 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2339 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2340 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2341 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2342 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2343 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2344 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary
;
2350 /* Alignment for incoming stack boundary in bits specified at
2352 static unsigned int ix86_user_incoming_stack_boundary
;
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary
;
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary
;
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node
;
2362 static GTY(()) tree ms_va_list_type_node
;
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix
[16];
2366 int internal_label_prefix_len
;
2368 /* Fence to use after loop using movnt. */
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2381 X86_64_INTEGER_CLASS
,
2382 X86_64_INTEGERSI_CLASS
,
2389 X86_64_COMPLEX_X87_CLASS
,
2393 #define MAX_CLASSES 8
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2397 static bool ext_80387_constants_init
= 0;
2400 static struct machine_function
* ix86_init_machine_status (void);
2401 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2405 static rtx
ix86_static_chain (const_tree
, bool);
2406 static int ix86_function_regparm (const_tree
, const_tree
);
2407 static void ix86_compute_frame_layout (struct ix86_frame
*);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2410 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2411 static tree
ix86_canonical_va_list_type (tree
);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2416 enum ix86_function_specific_strings
2418 IX86_FUNCTION_SPECIFIC_ARCH
,
2419 IX86_FUNCTION_SPECIFIC_TUNE
,
2420 IX86_FUNCTION_SPECIFIC_MAX
2423 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2424 const char *, enum fpmath_unit
, bool);
2425 static void ix86_function_specific_save (struct cl_target_option
*,
2426 struct gcc_options
*opts
);
2427 static void ix86_function_specific_restore (struct gcc_options
*opts
,
2428 struct cl_target_option
*);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option
*);
2431 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2433 struct gcc_options
*,
2434 struct gcc_options
*,
2435 struct gcc_options
*);
2436 static bool ix86_can_inline_p (tree
, tree
);
2437 static void ix86_set_current_function (tree
);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2440 static enum calling_abi
ix86_function_abi (const_tree
);
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted
;
2449 static int ix86_arch_specified
;
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2454 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2455 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2457 /* Processor target table, indexed by processor number */
2460 const char *const name
; /* processor name */
2461 const struct processor_costs
*cost
; /* Processor costs */
2462 const int align_loop
; /* Default alignments. */
2463 const int align_loop_max_skip
;
2464 const int align_jump
;
2465 const int align_jump_max_skip
;
2466 const int align_func
;
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2472 {"generic", &generic_cost
, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost
, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost
, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost
, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost
, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost
, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost
, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost
, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost
, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost
, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost
, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost
, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost
, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost
, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost
, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost
, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost
, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost
, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost
, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost
, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost
, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost
, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost
, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost
, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost
, 16, 10, 16, 7, 11}
2500 rest_of_handle_insert_vzeroupper (void)
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2509 ix86_optimize_mode_switching
[i
] = 0;
2511 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2513 /* Call optimize_mode_switching. */
2514 g
->get_passes ()->execute_pass_mode_switching ();
2520 const pass_data pass_data_insert_vzeroupper
=
2522 RTL_PASS
, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE
, /* optinfo_flags */
2525 TV_NONE
, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish
, /* todo_flags_finish */
2533 class pass_insert_vzeroupper
: public rtl_opt_pass
2536 pass_insert_vzeroupper(gcc::context
*ctxt
)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2540 /* opt_pass methods: */
2541 virtual bool gate (function
*)
2543 return TARGET_AVX
&& !TARGET_AVX512F
&& TARGET_VZEROUPPER
;
2546 virtual unsigned int execute (function
*)
2548 return rest_of_handle_insert_vzeroupper ();
2551 }; // class pass_insert_vzeroupper
2556 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2558 return new pass_insert_vzeroupper (ctxt
);
2561 /* Return true if a red-zone is in use. */
2564 ix86_using_red_zone (void)
2566 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2573 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2574 const char *tune
, enum fpmath_unit fpmath
,
2577 struct ix86_target_opts
2579 const char *option
; /* option string */
2580 HOST_WIDE_INT mask
; /* isa mask options */
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts
[] =
2587 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2588 { "-mfma", OPTION_MASK_ISA_FMA
},
2589 { "-mxop", OPTION_MASK_ISA_XOP
},
2590 { "-mlwp", OPTION_MASK_ISA_LWP
},
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ
},
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW
},
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL
},
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2602 { "-msse3", OPTION_MASK_ISA_SSE3
},
2603 { "-msse2", OPTION_MASK_ISA_SSE2
},
2604 { "-msse", OPTION_MASK_ISA_SSE
},
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2607 { "-mmmx", OPTION_MASK_ISA_MMX
},
2608 { "-mabm", OPTION_MASK_ISA_ABM
},
2609 { "-mbmi", OPTION_MASK_ISA_BMI
},
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2612 { "-mhle", OPTION_MASK_ISA_HLE
},
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2616 { "-madx", OPTION_MASK_ISA_ADX
},
2617 { "-mtbm", OPTION_MASK_ISA_TBM
},
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2621 { "-maes", OPTION_MASK_ISA_AES
},
2622 { "-msha", OPTION_MASK_ISA_SHA
},
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2626 { "-mf16c", OPTION_MASK_ISA_F16C
},
2627 { "-mrtm", OPTION_MASK_ISA_RTM
},
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1
},
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT
},
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC
},
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES
},
2637 static struct ix86_target_opts flag_opts
[] =
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128
},
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2642 { "-m80387", MASK_80387
},
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2644 { "-malign-double", MASK_ALIGN_DOUBLE
},
2645 { "-mcld", MASK_CLD
},
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2647 { "-mieee-fp", MASK_IEEE_FP
},
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2654 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2656 { "-mrecip", MASK_RECIP
},
2657 { "-mrtd", MASK_RTD
},
2658 { "-msseregparm", MASK_SSEREGPARM
},
2659 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2663 { "-mvzeroupper", MASK_VZEROUPPER
},
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2669 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2672 char target_other
[40];
2682 memset (opts
, '\0', sizeof (opts
));
2684 /* Add -march= option. */
2687 opts
[num
][0] = "-march=";
2688 opts
[num
++][1] = arch
;
2691 /* Add -mtune= option. */
2694 opts
[num
][0] = "-mtune=";
2695 opts
[num
++][1] = tune
;
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2701 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2705 isa
&= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32
);
2711 opts
[num
++][0] = abi
;
2713 /* Pick out the options in isa options. */
2714 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2716 if ((isa
& isa_opts
[i
].mask
) != 0)
2718 opts
[num
++][0] = isa_opts
[i
].option
;
2719 isa
&= ~ isa_opts
[i
].mask
;
2723 if (isa
&& add_nl_p
)
2725 opts
[num
++][0] = isa_other
;
2726 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2730 /* Add flag options. */
2731 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2733 if ((flags
& flag_opts
[i
].mask
) != 0)
2735 opts
[num
++][0] = flag_opts
[i
].option
;
2736 flags
&= ~ flag_opts
[i
].mask
;
2740 if (flags
&& add_nl_p
)
2742 opts
[num
++][0] = target_other
;
2743 sprintf (target_other
, "(other flags: %#x)", flags
);
2746 /* Add -fpmath= option. */
2749 opts
[num
][0] = "-mfpmath=";
2750 switch ((int) fpmath
)
2753 opts
[num
++][1] = "387";
2757 opts
[num
++][1] = "sse";
2760 case FPMATH_387
| FPMATH_SSE
:
2761 opts
[num
++][1] = "sse+387";
2773 gcc_assert (num
< ARRAY_SIZE (opts
));
2775 /* Size the string. */
2777 sep_len
= (add_nl_p
) ? 3 : 1;
2778 for (i
= 0; i
< num
; i
++)
2781 for (j
= 0; j
< 2; j
++)
2783 len
+= strlen (opts
[i
][j
]);
2786 /* Build the string. */
2787 ret
= ptr
= (char *) xmalloc (len
);
2790 for (i
= 0; i
< num
; i
++)
2794 for (j
= 0; j
< 2; j
++)
2795 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2802 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2810 for (j
= 0; j
< 2; j
++)
2813 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2815 line_len
+= len2
[j
];
2820 gcc_assert (ret
+ len
>= ptr
);
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2829 ix86_profile_before_prologue (void)
2831 return flag_fentry
!= 0;
2834 /* Function that is callable from the debugger to print the current
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2839 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2840 ix86_arch_string
, ix86_tune_string
,
2845 fprintf (stderr
, "%s\n\n", opts
);
2849 fputs ("<no options>\n\n", stderr
);
2854 static const char *stringop_alg_names
[] = {
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2865 strategy_alg:max_size:[align|noalign]
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2874 -mmemcpy-strategy=libcall:-1:noalign
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2887 struct stringop_size_range
2895 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2897 const struct stringop_algs
*default_algs
;
2898 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2899 char *curr_range_str
, *next_range_str
;
2903 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2905 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2907 curr_range_str
= strategy_str
;
2914 next_range_str
= strchr (curr_range_str
, ',');
2916 *next_range_str
++ = '\0';
2918 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2919 alg_name
, &maxs
, align
))
2921 error ("wrong arg %s to option %s", curr_range_str
,
2922 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2926 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2928 error ("size ranges of option %s should be increasing",
2929 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2933 for (i
= 0; i
< last_alg
; i
++)
2934 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2939 error ("wrong stringop strategy name %s specified for option %s",
2941 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2945 input_ranges
[n
].max
= maxs
;
2946 input_ranges
[n
].alg
= (stringop_alg
) i
;
2947 if (!strcmp (align
, "align"))
2948 input_ranges
[n
].noalign
= false;
2949 else if (!strcmp (align
, "noalign"))
2950 input_ranges
[n
].noalign
= true;
2953 error ("unknown alignment %s specified for option %s",
2954 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 curr_range_str
= next_range_str
;
2960 while (curr_range_str
);
2962 if (input_ranges
[n
- 1].max
!= -1)
2964 error ("the max value for the last size range should be -1"
2966 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2970 if (n
> MAX_STRINGOP_ALGS
)
2972 error ("too many size ranges specified in option %s",
2973 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2977 /* Now override the default algs array. */
2978 for (i
= 0; i
< n
; i
++)
2980 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2981 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2982 = input_ranges
[i
].alg
;
2983 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2984 = input_ranges
[i
].noalign
;
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2993 parse_mtune_ctrl_str (bool dump
)
2995 if (!ix86_tune_ctrl_string
)
2998 char *next_feature_string
= NULL
;
2999 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
3000 char *orig
= curr_feature_string
;
3006 next_feature_string
= strchr (curr_feature_string
, ',');
3007 if (next_feature_string
)
3008 *next_feature_string
++ = '\0';
3009 if (*curr_feature_string
== '^')
3011 curr_feature_string
++;
3014 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3016 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
3018 ix86_tune_features
[i
] = !clear
;
3020 fprintf (stderr
, "Explicitly %s feature %s\n",
3021 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
3025 if (i
== X86_TUNE_LAST
)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear
? curr_feature_string
- 1 : curr_feature_string
);
3028 curr_feature_string
= next_feature_string
;
3030 while (curr_feature_string
);
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3038 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
3040 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
3043 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3045 if (ix86_tune_no_default
)
3046 ix86_tune_features
[i
] = 0;
3048 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3053 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
3054 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3055 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
3056 ix86_tune_features
[i
] ? "on" : "off");
3059 parse_mtune_ctrl_str (dump
);
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3068 ix86_option_override_internal (bool main_args_p
,
3069 struct gcc_options
*opts
,
3070 struct gcc_options
*opts_set
)
3073 unsigned int ix86_arch_mask
;
3074 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3153 /* if this reaches 64, need to widen struct pta flags below */
3157 const char *const name
; /* processor name or nickname. */
3158 const enum processor_type processor
;
3159 const enum attr_cpu schedule
;
3160 const unsigned HOST_WIDE_INT flags
;
3162 const processor_alias_table
[] =
3164 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3165 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3166 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3167 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3169 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3170 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3171 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3172 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3173 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3174 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3177 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3178 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3180 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3182 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3183 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3184 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3185 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3186 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3187 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3188 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3189 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3190 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3191 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3192 {"core2", PROCESSOR_CORE2
, CPU_CORE2
, PTA_CORE2
},
3193 {"nehalem", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_NEHALEM
},
3194 {"corei7", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_NEHALEM
},
3195 {"westmere", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_WESTMERE
},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3204 {"haswell", PROCESSOR_HASWELL
, CPU_NEHALEM
, PTA_HASWELL
},
3205 {"core-avx2", PROCESSOR_HASWELL
, CPU_NEHALEM
, PTA_HASWELL
},
3206 {"broadwell", PROCESSOR_HASWELL
, CPU_NEHALEM
, PTA_BROADWELL
},
3207 {"bonnell", PROCESSOR_BONNELL
, CPU_ATOM
, PTA_BONNELL
},
3208 {"atom", PROCESSOR_BONNELL
, CPU_ATOM
, PTA_BONNELL
},
3209 {"silvermont", PROCESSOR_SILVERMONT
, CPU_SLM
, PTA_SILVERMONT
},
3210 {"slm", PROCESSOR_SILVERMONT
, CPU_SLM
, PTA_SILVERMONT
},
3211 {"intel", PROCESSOR_INTEL
, CPU_SLM
, PTA_NEHALEM
},
3212 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3213 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3214 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3215 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3216 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3217 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3218 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3219 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3220 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3221 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3222 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3223 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3224 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3225 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3226 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3227 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3228 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3229 {"k8", PROCESSOR_K8
, CPU_K8
,
3230 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3231 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3232 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3233 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3234 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3235 {"opteron", PROCESSOR_K8
, CPU_K8
,
3236 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3237 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3238 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3239 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3240 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3241 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3242 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3243 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3244 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3245 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3246 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3247 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3248 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3249 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3250 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3251 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3252 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3253 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3254 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3255 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3256 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3257 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3258 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3259 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3260 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3261 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3262 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3263 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3264 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3265 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3266 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3267 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3268 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3269 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3270 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3271 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3272 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3273 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3274 {"bdver4", PROCESSOR_BDVER4
, CPU_BDVER4
,
3275 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3276 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3277 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
3278 | PTA_FMA4
| PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_BMI2
3279 | PTA_TBM
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
3280 | PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
| PTA_RDRND
3282 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3283 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3284 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3285 | PTA_FXSR
| PTA_XSAVE
},
3286 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3287 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3288 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3289 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3290 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3291 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3293 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3295 | PTA_HLE
/* flags are only used for -march switch. */ },
3298 /* -mrecip options. */
3301 const char *string
; /* option name */
3302 unsigned int mask
; /* mask bits to set */
3304 const recip_options
[] =
3306 { "all", RECIP_MASK_ALL
},
3307 { "none", RECIP_MASK_NONE
},
3308 { "div", RECIP_MASK_DIV
},
3309 { "sqrt", RECIP_MASK_SQRT
},
3310 { "vec-div", RECIP_MASK_VEC_DIV
},
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3314 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3326 prefix
= "option(\"";
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3334 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3335 #ifdef TARGET_BI_ARCH
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3343 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3344 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3350 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3351 || TARGET_16BIT_P (opts
->x_ix86_isa_flags
))
3352 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3357 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3359 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3360 OPTION_MASK_ABI_64 for TARGET_X32. */
3361 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3362 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3364 else if (TARGET_16BIT_P (opts
->x_ix86_isa_flags
))
3365 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ISA_64BIT
3366 | OPTION_MASK_ABI_X32
3367 | OPTION_MASK_ABI_64
);
3368 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3370 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3371 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3372 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3373 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3376 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3377 SUBTARGET_OVERRIDE_OPTIONS
;
3380 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3381 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3384 /* -fPIC is the default for x86_64. */
3385 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3386 opts
->x_flag_pic
= 2;
3388 /* Need to check -mtune=generic first. */
3389 if (opts
->x_ix86_tune_string
)
3391 /* As special support for cross compilers we read -mtune=native
3392 as -mtune=generic. With native compilers we won't see the
3393 -mtune=native, as it was changed by the driver. */
3394 if (!strcmp (opts
->x_ix86_tune_string
, "native"))
3396 opts
->x_ix86_tune_string
= "generic";
3398 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3399 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3400 "%stune=k8%s or %stune=generic%s instead as appropriate",
3401 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3405 if (opts
->x_ix86_arch_string
)
3406 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
3407 if (!opts
->x_ix86_tune_string
)
3409 opts
->x_ix86_tune_string
3410 = processor_target_table
[TARGET_CPU_DEFAULT
].name
;
3411 ix86_tune_defaulted
= 1;
3414 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3415 or defaulted. We need to use a sensible tune option. */
3416 if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3418 opts
->x_ix86_tune_string
= "generic";
3422 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
3423 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3425 /* rep; movq isn't available in 32-bit code. */
3426 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3427 opts
->x_ix86_stringop_alg
= no_stringop
;
3430 if (!opts
->x_ix86_arch_string
)
3431 opts
->x_ix86_arch_string
3432 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3433 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3435 ix86_arch_specified
= 1;
3437 if (opts_set
->x_ix86_pmode
)
3439 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3440 && opts
->x_ix86_pmode
== PMODE_SI
)
3441 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3442 && opts
->x_ix86_pmode
== PMODE_DI
))
3443 error ("address mode %qs not supported in the %s bit mode",
3444 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
3445 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
3448 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3449 ? PMODE_DI
: PMODE_SI
;
3451 if (!opts_set
->x_ix86_abi
)
3452 opts
->x_ix86_abi
= DEFAULT_ABI
;
3454 /* For targets using ms ABI enable ms-extensions, if not
3455 explicit turned off. For non-ms ABI we turn off this
3457 if (!opts_set
->x_flag_ms_extensions
)
3458 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3460 if (opts_set
->x_ix86_cmodel
)
3462 switch (opts
->x_ix86_cmodel
)
3466 if (opts
->x_flag_pic
)
3467 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
3468 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3469 error ("code model %qs not supported in the %s bit mode",
3475 if (opts
->x_flag_pic
)
3476 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
3477 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3478 error ("code model %qs not supported in the %s bit mode",
3480 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3481 error ("code model %qs not supported in x32 mode",
3487 if (opts
->x_flag_pic
)
3488 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
3489 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3490 error ("code model %qs not supported in the %s bit mode",
3492 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3493 error ("code model %qs not supported in x32 mode",
3498 if (opts
->x_flag_pic
)
3499 error ("code model %s does not support PIC mode", "32");
3500 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3501 error ("code model %qs not supported in the %s bit mode",
3506 if (opts
->x_flag_pic
)
3508 error ("code model %s does not support PIC mode", "kernel");
3509 opts
->x_ix86_cmodel
= CM_32
;
3511 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3512 error ("code model %qs not supported in the %s bit mode",
3522 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3523 use of rip-relative addressing. This eliminates fixups that
3524 would otherwise be needed if this object is to be placed in a
3525 DLL, and is essentially just as efficient as direct addressing. */
3526 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3527 && (TARGET_RDOS
|| TARGET_PECOFF
))
3528 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
3529 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3530 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3532 opts
->x_ix86_cmodel
= CM_32
;
3534 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
3536 error ("-masm=intel not supported in this configuration");
3537 opts
->x_ix86_asm_dialect
= ASM_ATT
;
3539 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
3540 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3541 sorry ("%i-bit mode not compiled in",
3542 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3544 for (i
= 0; i
< pta_size
; i
++)
3545 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
3547 ix86_schedule
= processor_alias_table
[i
].schedule
;
3548 ix86_arch
= processor_alias_table
[i
].processor
;
3549 /* Default cpu tuning to the architecture. */
3550 ix86_tune
= ix86_arch
;
3552 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3553 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3554 error ("CPU you selected does not support x86-64 "
3557 if (processor_alias_table
[i
].flags
& PTA_MMX
3558 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3559 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3560 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3561 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3562 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3563 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3564 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3565 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3566 if (processor_alias_table
[i
].flags
& PTA_SSE
3567 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3568 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3569 if (processor_alias_table
[i
].flags
& PTA_SSE2
3570 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3571 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3572 if (processor_alias_table
[i
].flags
& PTA_SSE3
3573 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3574 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3575 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3576 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3577 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3578 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3579 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3580 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3581 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3582 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3583 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3584 if (processor_alias_table
[i
].flags
& PTA_AVX
3585 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3586 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3587 if (processor_alias_table
[i
].flags
& PTA_AVX2
3588 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3589 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3590 if (processor_alias_table
[i
].flags
& PTA_FMA
3591 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3592 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3593 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3594 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3595 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3596 if (processor_alias_table
[i
].flags
& PTA_FMA4
3597 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3598 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3599 if (processor_alias_table
[i
].flags
& PTA_XOP
3600 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3601 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3602 if (processor_alias_table
[i
].flags
& PTA_LWP
3603 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3604 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3605 if (processor_alias_table
[i
].flags
& PTA_ABM
3606 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3607 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3608 if (processor_alias_table
[i
].flags
& PTA_BMI
3609 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3610 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3611 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3612 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3613 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3614 if (processor_alias_table
[i
].flags
& PTA_TBM
3615 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3616 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3617 if (processor_alias_table
[i
].flags
& PTA_BMI2
3618 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3619 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3620 if (processor_alias_table
[i
].flags
& PTA_CX16
3621 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3622 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3623 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3624 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3625 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3626 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3627 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3628 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3629 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3630 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3631 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3632 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3633 if (processor_alias_table
[i
].flags
& PTA_AES
3634 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3635 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3636 if (processor_alias_table
[i
].flags
& PTA_SHA
3637 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SHA
))
3638 ix86_isa_flags
|= OPTION_MASK_ISA_SHA
;
3639 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3640 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3641 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3642 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3643 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3644 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3645 if (processor_alias_table
[i
].flags
& PTA_RDRND
3646 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3647 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3648 if (processor_alias_table
[i
].flags
& PTA_F16C
3649 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3650 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3651 if (processor_alias_table
[i
].flags
& PTA_RTM
3652 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3653 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3654 if (processor_alias_table
[i
].flags
& PTA_HLE
3655 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3656 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3657 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3658 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3659 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3660 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3661 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3662 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3663 if (processor_alias_table
[i
].flags
& PTA_ADX
3664 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3665 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3666 if (processor_alias_table
[i
].flags
& PTA_FXSR
3667 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3668 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3669 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3670 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3671 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3672 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3673 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3674 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3675 if (processor_alias_table
[i
].flags
& PTA_AVX512F
3676 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
3677 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
3678 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
3679 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
3680 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
3681 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
3682 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
3683 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
3684 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
3685 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
3686 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
3687 if (processor_alias_table
[i
].flags
& PTA_PREFETCHWT1
3688 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PREFETCHWT1
))
3689 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PREFETCHWT1
;
3690 if (processor_alias_table
[i
].flags
& PTA_CLFLUSHOPT
3691 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CLFLUSHOPT
))
3692 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CLFLUSHOPT
;
3693 if (processor_alias_table
[i
].flags
& PTA_XSAVEC
3694 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEC
))
3695 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEC
;
3696 if (processor_alias_table
[i
].flags
& PTA_XSAVES
3697 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVES
))
3698 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVES
;
3699 if (processor_alias_table
[i
].flags
& PTA_AVX512DQ
3700 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512DQ
))
3701 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512DQ
;
3702 if (processor_alias_table
[i
].flags
& PTA_AVX512BW
3703 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512BW
))
3704 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512BW
;
3705 if (processor_alias_table
[i
].flags
& PTA_AVX512VL
3706 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VL
))
3707 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512VL
;
3708 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3709 x86_prefetch_sse
= true;
3714 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
3715 error ("generic CPU can be used only for %stune=%s %s",
3716 prefix
, suffix
, sw
);
3717 else if (!strcmp (opts
->x_ix86_arch_string
, "intel"))
3718 error ("intel CPU can be used only for %stune=%s %s",
3719 prefix
, suffix
, sw
);
3720 else if (i
== pta_size
)
3721 error ("bad value (%s) for %sarch=%s %s",
3722 opts
->x_ix86_arch_string
, prefix
, suffix
, sw
);
3724 ix86_arch_mask
= 1u << ix86_arch
;
3725 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3726 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3728 for (i
= 0; i
< pta_size
; i
++)
3729 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
3731 ix86_schedule
= processor_alias_table
[i
].schedule
;
3732 ix86_tune
= processor_alias_table
[i
].processor
;
3733 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3735 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3737 if (ix86_tune_defaulted
)
3739 opts
->x_ix86_tune_string
= "x86-64";
3740 for (i
= 0; i
< pta_size
; i
++)
3741 if (! strcmp (opts
->x_ix86_tune_string
,
3742 processor_alias_table
[i
].name
))
3744 ix86_schedule
= processor_alias_table
[i
].schedule
;
3745 ix86_tune
= processor_alias_table
[i
].processor
;
3748 error ("CPU you selected does not support x86-64 "
3752 /* Intel CPUs have always interpreted SSE prefetch instructions as
3753 NOPs; so, we can enable SSE prefetch instructions even when
3754 -mtune (rather than -march) points us to a processor that has them.
3755 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3756 higher processors. */
3758 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3759 x86_prefetch_sse
= true;
3763 if (ix86_tune_specified
&& i
== pta_size
)
3764 error ("bad value (%s) for %stune=%s %s",
3765 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3767 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
3769 #ifndef USE_IX86_FRAME_POINTER
3770 #define USE_IX86_FRAME_POINTER 0
3773 #ifndef USE_X86_64_FRAME_POINTER
3774 #define USE_X86_64_FRAME_POINTER 0
3777 /* Set the default values for switches whose default depends on TARGET_64BIT
3778 in case they weren't overwritten by command line options. */
3779 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3781 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3782 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3783 if (opts
->x_flag_asynchronous_unwind_tables
3784 && !opts_set
->x_flag_unwind_tables
3785 && TARGET_64BIT_MS_ABI
)
3786 opts
->x_flag_unwind_tables
= 1;
3787 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3788 opts
->x_flag_unwind_tables
3789 = opts
->x_flag_asynchronous_unwind_tables
= 1;
3790 if (opts
->x_flag_pcc_struct_return
== 2)
3791 opts
->x_flag_pcc_struct_return
= 0;
3795 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3796 opts
->x_flag_omit_frame_pointer
3797 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
3798 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3799 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3800 if (opts
->x_flag_pcc_struct_return
== 2)
3801 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3804 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3805 if (opts
->x_optimize_size
)
3806 ix86_cost
= &ix86_size_cost
;
3808 ix86_cost
= ix86_tune_cost
;
3810 /* Arrange to set up i386_stack_locals for all functions. */
3811 init_machine_status
= ix86_init_machine_status
;
3813 /* Validate -mregparm= value. */
3814 if (opts_set
->x_ix86_regparm
)
3816 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3817 warning (0, "-mregparm is ignored in 64-bit mode");
3818 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
3820 error ("-mregparm=%d is not between 0 and %d",
3821 opts
->x_ix86_regparm
, REGPARM_MAX
);
3822 opts
->x_ix86_regparm
= 0;
3825 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3826 opts
->x_ix86_regparm
= REGPARM_MAX
;
3828 /* Default align_* from the processor table. */
3829 if (opts
->x_align_loops
== 0)
3831 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3832 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3834 if (opts
->x_align_jumps
== 0)
3836 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3837 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3839 if (opts
->x_align_functions
== 0)
3841 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
3844 /* Provide default for -mbranch-cost= value. */
3845 if (!opts_set
->x_ix86_branch_cost
)
3846 opts
->x_ix86_branch_cost
= ix86_cost
->branch_cost
;
3848 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3850 opts
->x_target_flags
3851 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
3853 /* Enable by default the SSE and MMX builtins. Do allow the user to
3854 explicitly disable any of these. In particular, disabling SSE and
3855 MMX for kernel code is extremely useful. */
3856 if (!ix86_arch_specified
)
3857 opts
->x_ix86_isa_flags
3858 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3859 | TARGET_SUBTARGET64_ISA_DEFAULT
)
3860 & ~opts
->x_ix86_isa_flags_explicit
);
3862 if (TARGET_RTD_P (opts
->x_target_flags
))
3863 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3867 opts
->x_target_flags
3868 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
3870 if (!ix86_arch_specified
)
3871 opts
->x_ix86_isa_flags
3872 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
3874 /* i386 ABI does not specify red zone. It still makes sense to use it
3875 when programmer takes care to stack from being destroyed. */
3876 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
3877 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
3880 /* Keep nonleaf frame pointers. */
3881 if (opts
->x_flag_omit_frame_pointer
)
3882 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3883 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
3884 opts
->x_flag_omit_frame_pointer
= 1;
3886 /* If we're doing fast math, we don't care about comparison order
3887 wrt NaNs. This lets us use a shorter comparison sequence. */
3888 if (opts
->x_flag_finite_math_only
)
3889 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
3891 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3892 since the insns won't need emulation. */
3893 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
3894 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3896 /* Likewise, if the target doesn't have a 387, or we've specified
3897 software floating point, don't use 387 inline intrinsics. */
3898 if (!TARGET_80387_P (opts
->x_target_flags
))
3899 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
3901 /* Turn on MMX builtins for -msse. */
3902 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3903 opts
->x_ix86_isa_flags
3904 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
3906 /* Enable SSE prefetch. */
3907 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
3908 || (TARGET_PRFCHW
&& !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
)))
3909 x86_prefetch_sse
= true;
3911 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3912 if (TARGET_3DNOW_P (opts
->x_ix86_isa_flags
)
3913 || TARGET_PREFETCHWT1_P (opts
->x_ix86_isa_flags
))
3914 opts
->x_ix86_isa_flags
3915 |= OPTION_MASK_ISA_PRFCHW
& ~opts
->x_ix86_isa_flags_explicit
;
3917 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3918 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
3919 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
3920 opts
->x_ix86_isa_flags
3921 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3923 /* Enable lzcnt instruction for -mabm. */
3924 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
3925 opts
->x_ix86_isa_flags
3926 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3928 /* Validate -mpreferred-stack-boundary= value or default it to
3929 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3930 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3931 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
3933 int min
= (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3934 ? (TARGET_SSE_P (opts
->x_ix86_isa_flags
) ? 4 : 3) : 2);
3935 int max
= (TARGET_SEH
? 4 : 12);
3937 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
3938 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
3941 error ("-mpreferred-stack-boundary is not supported "
3944 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3945 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
3948 ix86_preferred_stack_boundary
3949 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3952 /* Set the default value for -mstackrealign. */
3953 if (opts
->x_ix86_force_align_arg_pointer
== -1)
3954 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3956 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3958 /* Validate -mincoming-stack-boundary= value or default it to
3959 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3960 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3961 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
3963 if (opts
->x_ix86_incoming_stack_boundary_arg
3964 < (TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2)
3965 || opts
->x_ix86_incoming_stack_boundary_arg
> 12)
3966 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3967 opts
->x_ix86_incoming_stack_boundary_arg
,
3968 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2);
3971 ix86_user_incoming_stack_boundary
3972 = (1 << opts
->x_ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3973 ix86_incoming_stack_boundary
3974 = ix86_user_incoming_stack_boundary
;
3978 #ifndef NO_PROFILE_COUNTERS
3979 if (flag_nop_mcount
)
3980 error ("-mnop-mcount is not compatible with this target");
3982 if (flag_nop_mcount
&& flag_pic
)
3983 error ("-mnop-mcount is not implemented for -fPIC");
3985 /* Accept -msseregparm only if at least SSE support is enabled. */
3986 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
3987 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3988 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3990 if (opts_set
->x_ix86_fpmath
)
3992 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
3994 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3996 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3997 opts
->x_ix86_fpmath
= FPMATH_387
;
3999 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
4000 && !TARGET_80387_P (opts
->x_target_flags
))
4002 warning (0, "387 instruction set disabled, using SSE arithmetics");
4003 opts
->x_ix86_fpmath
= FPMATH_SSE
;
4007 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4008 fpmath=387. The second is however default at many targets since the
4009 extra 80bit precision of temporaries is considered to be part of ABI.
4010 Overwrite the default at least for -ffast-math.
4011 TODO: -mfpmath=both seems to produce same performing code with bit
4012 smaller binaries. It is however not clear if register allocation is
4013 ready for this setting.
4014 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4015 codegen. We may switch to 387 with -ffast-math for size optimized
4017 else if (fast_math_flags_set_p (&global_options
)
4018 && TARGET_SSE2_P (opts
->x_ix86_isa_flags
))
4019 opts
->x_ix86_fpmath
= FPMATH_SSE
;
4021 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
4023 /* If the i387 is disabled, then do not return values in it. */
4024 if (!TARGET_80387_P (opts
->x_target_flags
))
4025 opts
->x_target_flags
&= ~MASK_FLOAT_RETURNS
;
4027 /* Use external vectorized library in vectorizing intrinsics. */
4028 if (opts_set
->x_ix86_veclibabi_type
)
4029 switch (opts
->x_ix86_veclibabi_type
)
4031 case ix86_veclibabi_type_svml
:
4032 ix86_veclib_handler
= ix86_veclibabi_svml
;
4035 case ix86_veclibabi_type_acml
:
4036 ix86_veclib_handler
= ix86_veclibabi_acml
;
4043 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
4044 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4045 && !opts
->x_optimize_size
)
4046 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4048 /* If stack probes are required, the space used for large function
4049 arguments on the stack must also be probed, so enable
4050 -maccumulate-outgoing-args so this happens in the prologue. */
4051 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
4052 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
4054 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4055 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4056 "for correctness", prefix
, suffix
);
4057 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4060 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4063 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
4064 p
= strchr (internal_label_prefix
, 'X');
4065 internal_label_prefix_len
= p
- internal_label_prefix
;
4069 /* When scheduling description is not available, disable scheduler pass
4070 so it won't slow down the compilation and make x87 code slower. */
4071 if (!TARGET_SCHEDULE
)
4072 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
4074 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
4075 ix86_tune_cost
->simultaneous_prefetches
,
4076 opts
->x_param_values
,
4077 opts_set
->x_param_values
);
4078 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
4079 ix86_tune_cost
->prefetch_block
,
4080 opts
->x_param_values
,
4081 opts_set
->x_param_values
);
4082 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
4083 ix86_tune_cost
->l1_cache_size
,
4084 opts
->x_param_values
,
4085 opts_set
->x_param_values
);
4086 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
4087 ix86_tune_cost
->l2_cache_size
,
4088 opts
->x_param_values
,
4089 opts_set
->x_param_values
);
4091 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4092 if (opts
->x_flag_prefetch_loop_arrays
< 0
4094 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
4095 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
4096 opts
->x_flag_prefetch_loop_arrays
= 1;
4098 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4099 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4100 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
4101 targetm
.expand_builtin_va_start
= NULL
;
4103 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4105 ix86_gen_leave
= gen_leave_rex64
;
4106 if (Pmode
== DImode
)
4108 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
4109 ix86_gen_tls_local_dynamic_base_64
4110 = gen_tls_local_dynamic_base_64_di
;
4114 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
4115 ix86_gen_tls_local_dynamic_base_64
4116 = gen_tls_local_dynamic_base_64_si
;
4120 ix86_gen_leave
= gen_leave
;
4122 if (Pmode
== DImode
)
4124 ix86_gen_add3
= gen_adddi3
;
4125 ix86_gen_sub3
= gen_subdi3
;
4126 ix86_gen_sub3_carry
= gen_subdi3_carry
;
4127 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
4128 ix86_gen_andsp
= gen_anddi3
;
4129 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
4130 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
4131 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
4132 ix86_gen_monitor
= gen_sse3_monitor_di
;
4136 ix86_gen_add3
= gen_addsi3
;
4137 ix86_gen_sub3
= gen_subsi3
;
4138 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4139 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4140 ix86_gen_andsp
= gen_andsi3
;
4141 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4142 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4143 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4144 ix86_gen_monitor
= gen_sse3_monitor_si
;
4148 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4149 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4150 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
4153 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
)
4155 if (opts
->x_flag_fentry
> 0)
4156 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4158 opts
->x_flag_fentry
= 0;
4160 else if (TARGET_SEH
)
4162 if (opts
->x_flag_fentry
== 0)
4163 sorry ("-mno-fentry isn%'t compatible with SEH");
4164 opts
->x_flag_fentry
= 1;
4166 else if (opts
->x_flag_fentry
< 0)
4168 #if defined(PROFILE_BEFORE_PROLOGUE)
4169 opts
->x_flag_fentry
= 1;
4171 opts
->x_flag_fentry
= 0;
4175 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4176 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4177 AVX unaligned load/store. */
4178 if (!opts
->x_optimize_size
)
4180 if (flag_expensive_optimizations
4181 && !(opts_set
->x_target_flags
& MASK_VZEROUPPER
))
4182 opts
->x_target_flags
|= MASK_VZEROUPPER
;
4183 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL
]
4184 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4185 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4186 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL
]
4187 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4188 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4189 /* Enable 128-bit AVX instruction generation
4190 for the auto-vectorizer. */
4191 if (TARGET_AVX128_OPTIMAL
4192 && !(opts_set
->x_target_flags
& MASK_PREFER_AVX128
))
4193 opts
->x_target_flags
|= MASK_PREFER_AVX128
;
4196 if (opts
->x_ix86_recip_name
)
4198 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
4200 unsigned int mask
, i
;
4203 while ((q
= strtok (p
, ",")) != NULL
)
4214 if (!strcmp (q
, "default"))
4215 mask
= RECIP_MASK_ALL
;
4218 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4219 if (!strcmp (q
, recip_options
[i
].string
))
4221 mask
= recip_options
[i
].mask
;
4225 if (i
== ARRAY_SIZE (recip_options
))
4227 error ("unknown option for -mrecip=%s", q
);
4229 mask
= RECIP_MASK_NONE
;
4233 opts
->x_recip_mask_explicit
|= mask
;
4235 opts
->x_recip_mask
&= ~mask
;
4237 opts
->x_recip_mask
|= mask
;
4241 if (TARGET_RECIP_P (opts
->x_target_flags
))
4242 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
4243 else if (opts_set
->x_target_flags
& MASK_RECIP
)
4244 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
4246 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4247 for 64-bit Bionic. */
4248 if (TARGET_HAS_BIONIC
4249 && !(opts_set
->x_target_flags
4250 & (MASK_LONG_DOUBLE_64
| MASK_LONG_DOUBLE_128
)))
4251 opts
->x_target_flags
|= (TARGET_64BIT
4252 ? MASK_LONG_DOUBLE_128
4253 : MASK_LONG_DOUBLE_64
);
4255 /* Only one of them can be active. */
4256 gcc_assert ((opts
->x_target_flags
& MASK_LONG_DOUBLE_64
) == 0
4257 || (opts
->x_target_flags
& MASK_LONG_DOUBLE_128
) == 0);
4259 /* Save the initial options in case the user does function specific
4262 target_option_default_node
= target_option_current_node
4263 = build_target_option_node (opts
);
4265 /* Handle stack protector */
4266 if (!opts_set
->x_ix86_stack_protector_guard
)
4267 opts
->x_ix86_stack_protector_guard
4268 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4270 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4271 if (opts
->x_ix86_tune_memcpy_strategy
)
4273 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
4274 ix86_parse_stringop_strategy_string (str
, false);
4278 if (opts
->x_ix86_tune_memset_strategy
)
4280 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
4281 ix86_parse_stringop_strategy_string (str
, true);
4286 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4289 ix86_option_override (void)
4291 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4292 static struct register_pass_info insert_vzeroupper_info
4293 = { pass_insert_vzeroupper
, "reload",
4294 1, PASS_POS_INSERT_AFTER
4297 ix86_option_override_internal (true, &global_options
, &global_options_set
);
4300 /* This needs to be done at start up. It's convenient to do it here. */
4301 register_pass (&insert_vzeroupper_info
);
4304 /* Update register usage after having seen the compiler flags. */
4307 ix86_conditional_register_usage (void)
4312 /* The PIC register, if it exists, is fixed. */
4313 j
= PIC_OFFSET_TABLE_REGNUM
;
4314 if (j
!= INVALID_REGNUM
)
4315 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4317 /* For 32-bit targets, squash the REX registers. */
4320 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4321 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4322 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4323 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4324 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4325 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4328 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4329 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4330 : TARGET_64BIT
? (1 << 2)
4333 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4335 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4337 /* Set/reset conditionally defined registers from
4338 CALL_USED_REGISTERS initializer. */
4339 if (call_used_regs
[i
] > 1)
4340 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4342 /* Calculate registers of CLOBBERED_REGS register set
4343 as call used registers from GENERAL_REGS register set. */
4344 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4345 && call_used_regs
[i
])
4346 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4349 /* If MMX is disabled, squash the registers. */
4351 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4352 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4353 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4355 /* If SSE is disabled, squash the registers. */
4357 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4358 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4359 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4361 /* If the FPU is disabled, squash the registers. */
4362 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4363 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4364 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4365 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4367 /* If AVX512F is disabled, squash the registers. */
4368 if (! TARGET_AVX512F
)
4370 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4371 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4373 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4374 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4379 /* Save the current options */
4382 ix86_function_specific_save (struct cl_target_option
*ptr
,
4383 struct gcc_options
*opts
)
4385 ptr
->arch
= ix86_arch
;
4386 ptr
->schedule
= ix86_schedule
;
4387 ptr
->tune
= ix86_tune
;
4388 ptr
->branch_cost
= ix86_branch_cost
;
4389 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4390 ptr
->arch_specified
= ix86_arch_specified
;
4391 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
4392 ptr
->x_ix86_target_flags_explicit
= opts
->x_ix86_target_flags_explicit
;
4393 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
4394 ptr
->x_ix86_arch_string
= opts
->x_ix86_arch_string
;
4395 ptr
->x_ix86_tune_string
= opts
->x_ix86_tune_string
;
4396 ptr
->x_ix86_cmodel
= opts
->x_ix86_cmodel
;
4397 ptr
->x_ix86_abi
= opts
->x_ix86_abi
;
4398 ptr
->x_ix86_asm_dialect
= opts
->x_ix86_asm_dialect
;
4399 ptr
->x_ix86_branch_cost
= opts
->x_ix86_branch_cost
;
4400 ptr
->x_ix86_dump_tunes
= opts
->x_ix86_dump_tunes
;
4401 ptr
->x_ix86_force_align_arg_pointer
= opts
->x_ix86_force_align_arg_pointer
;
4402 ptr
->x_ix86_force_drap
= opts
->x_ix86_force_drap
;
4403 ptr
->x_ix86_incoming_stack_boundary_arg
= opts
->x_ix86_incoming_stack_boundary_arg
;
4404 ptr
->x_ix86_pmode
= opts
->x_ix86_pmode
;
4405 ptr
->x_ix86_preferred_stack_boundary_arg
= opts
->x_ix86_preferred_stack_boundary_arg
;
4406 ptr
->x_ix86_recip_name
= opts
->x_ix86_recip_name
;
4407 ptr
->x_ix86_regparm
= opts
->x_ix86_regparm
;
4408 ptr
->x_ix86_section_threshold
= opts
->x_ix86_section_threshold
;
4409 ptr
->x_ix86_sse2avx
= opts
->x_ix86_sse2avx
;
4410 ptr
->x_ix86_stack_protector_guard
= opts
->x_ix86_stack_protector_guard
;
4411 ptr
->x_ix86_stringop_alg
= opts
->x_ix86_stringop_alg
;
4412 ptr
->x_ix86_tls_dialect
= opts
->x_ix86_tls_dialect
;
4413 ptr
->x_ix86_tune_ctrl_string
= opts
->x_ix86_tune_ctrl_string
;
4414 ptr
->x_ix86_tune_memcpy_strategy
= opts
->x_ix86_tune_memcpy_strategy
;
4415 ptr
->x_ix86_tune_memset_strategy
= opts
->x_ix86_tune_memset_strategy
;
4416 ptr
->x_ix86_tune_no_default
= opts
->x_ix86_tune_no_default
;
4417 ptr
->x_ix86_veclibabi_type
= opts
->x_ix86_veclibabi_type
;
4419 /* The fields are char but the variables are not; make sure the
4420 values fit in the fields. */
4421 gcc_assert (ptr
->arch
== ix86_arch
);
4422 gcc_assert (ptr
->schedule
== ix86_schedule
);
4423 gcc_assert (ptr
->tune
== ix86_tune
);
4424 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4427 /* Restore the current options */
4430 ix86_function_specific_restore (struct gcc_options
*opts
,
4431 struct cl_target_option
*ptr
)
4433 enum processor_type old_tune
= ix86_tune
;
4434 enum processor_type old_arch
= ix86_arch
;
4435 unsigned int ix86_arch_mask
;
4438 /* We don't change -fPIC. */
4439 opts
->x_flag_pic
= flag_pic
;
4441 ix86_arch
= (enum processor_type
) ptr
->arch
;
4442 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4443 ix86_tune
= (enum processor_type
) ptr
->tune
;
4444 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
4445 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4446 ix86_arch_specified
= ptr
->arch_specified
;
4447 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4448 opts
->x_ix86_target_flags_explicit
= ptr
->x_ix86_target_flags_explicit
;
4449 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4450 opts
->x_ix86_arch_string
= ptr
->x_ix86_arch_string
;
4451 opts
->x_ix86_tune_string
= ptr
->x_ix86_tune_string
;
4452 opts
->x_ix86_cmodel
= ptr
->x_ix86_cmodel
;
4453 opts
->x_ix86_abi
= ptr
->x_ix86_abi
;
4454 opts
->x_ix86_asm_dialect
= ptr
->x_ix86_asm_dialect
;
4455 opts
->x_ix86_branch_cost
= ptr
->x_ix86_branch_cost
;
4456 opts
->x_ix86_dump_tunes
= ptr
->x_ix86_dump_tunes
;
4457 opts
->x_ix86_force_align_arg_pointer
= ptr
->x_ix86_force_align_arg_pointer
;
4458 opts
->x_ix86_force_drap
= ptr
->x_ix86_force_drap
;
4459 opts
->x_ix86_incoming_stack_boundary_arg
= ptr
->x_ix86_incoming_stack_boundary_arg
;
4460 opts
->x_ix86_pmode
= ptr
->x_ix86_pmode
;
4461 opts
->x_ix86_preferred_stack_boundary_arg
= ptr
->x_ix86_preferred_stack_boundary_arg
;
4462 opts
->x_ix86_recip_name
= ptr
->x_ix86_recip_name
;
4463 opts
->x_ix86_regparm
= ptr
->x_ix86_regparm
;
4464 opts
->x_ix86_section_threshold
= ptr
->x_ix86_section_threshold
;
4465 opts
->x_ix86_sse2avx
= ptr
->x_ix86_sse2avx
;
4466 opts
->x_ix86_stack_protector_guard
= ptr
->x_ix86_stack_protector_guard
;
4467 opts
->x_ix86_stringop_alg
= ptr
->x_ix86_stringop_alg
;
4468 opts
->x_ix86_tls_dialect
= ptr
->x_ix86_tls_dialect
;
4469 opts
->x_ix86_tune_ctrl_string
= ptr
->x_ix86_tune_ctrl_string
;
4470 opts
->x_ix86_tune_memcpy_strategy
= ptr
->x_ix86_tune_memcpy_strategy
;
4471 opts
->x_ix86_tune_memset_strategy
= ptr
->x_ix86_tune_memset_strategy
;
4472 opts
->x_ix86_tune_no_default
= ptr
->x_ix86_tune_no_default
;
4473 opts
->x_ix86_veclibabi_type
= ptr
->x_ix86_veclibabi_type
;
4475 /* Recreate the arch feature tests if the arch changed */
4476 if (old_arch
!= ix86_arch
)
4478 ix86_arch_mask
= 1u << ix86_arch
;
4479 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4480 ix86_arch_features
[i
]
4481 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4484 /* Recreate the tune optimization tests */
4485 if (old_tune
!= ix86_tune
)
4486 set_ix86_tune_features (ix86_tune
, false);
4489 /* Print the current options */
4492 ix86_function_specific_print (FILE *file
, int indent
,
4493 struct cl_target_option
*ptr
)
4496 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4497 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4499 gcc_assert (ptr
->arch
< PROCESSOR_max
);
4500 fprintf (file
, "%*sarch = %d (%s)\n",
4502 ptr
->arch
, processor_target_table
[ptr
->arch
].name
);
4504 gcc_assert (ptr
->tune
< PROCESSOR_max
);
4505 fprintf (file
, "%*stune = %d (%s)\n",
4507 ptr
->tune
, processor_target_table
[ptr
->tune
].name
);
4509 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4513 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4514 free (target_string
);
4519 /* Inner function to process the attribute((target(...))), take an argument and
4520 set the current options from the argument. If we have a list, recursively go
4524 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4525 struct gcc_options
*opts
,
4526 struct gcc_options
*opts_set
,
4527 struct gcc_options
*enum_opts_set
)
4532 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4533 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4534 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4535 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4536 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4552 enum ix86_opt_type type
;
4557 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4558 IX86_ATTR_ISA ("abm", OPT_mabm
),
4559 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4560 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4561 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4562 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4563 IX86_ATTR_ISA ("aes", OPT_maes
),
4564 IX86_ATTR_ISA ("sha", OPT_msha
),
4565 IX86_ATTR_ISA ("avx", OPT_mavx
),
4566 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4567 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
4568 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
4569 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
4570 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
4571 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq
),
4572 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw
),
4573 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl
),
4574 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4575 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4576 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4577 IX86_ATTR_ISA ("sse", OPT_msse
),
4578 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4579 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4580 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4581 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4582 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4583 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4584 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4585 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4586 IX86_ATTR_ISA ("fma", OPT_mfma
),
4587 IX86_ATTR_ISA ("xop", OPT_mxop
),
4588 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4589 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4590 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4591 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4592 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4593 IX86_ATTR_ISA ("hle", OPT_mhle
),
4594 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4595 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4596 IX86_ATTR_ISA ("adx", OPT_madx
),
4597 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4598 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4599 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4600 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1
),
4601 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt
),
4602 IX86_ATTR_ISA ("xsavec", OPT_mxsavec
),
4603 IX86_ATTR_ISA ("xsaves", OPT_mxsaves
),
4606 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4608 /* string options */
4609 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4610 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4613 IX86_ATTR_YES ("cld",
4617 IX86_ATTR_NO ("fancy-math-387",
4618 OPT_mfancy_math_387
,
4619 MASK_NO_FANCY_MATH_387
),
4621 IX86_ATTR_YES ("ieee-fp",
4625 IX86_ATTR_YES ("inline-all-stringops",
4626 OPT_minline_all_stringops
,
4627 MASK_INLINE_ALL_STRINGOPS
),
4629 IX86_ATTR_YES ("inline-stringops-dynamically",
4630 OPT_minline_stringops_dynamically
,
4631 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4633 IX86_ATTR_NO ("align-stringops",
4634 OPT_mno_align_stringops
,
4635 MASK_NO_ALIGN_STRINGOPS
),
4637 IX86_ATTR_YES ("recip",
4643 /* If this is a list, recurse to get the options. */
4644 if (TREE_CODE (args
) == TREE_LIST
)
4648 for (; args
; args
= TREE_CHAIN (args
))
4649 if (TREE_VALUE (args
)
4650 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4651 p_strings
, opts
, opts_set
,
4658 else if (TREE_CODE (args
) != STRING_CST
)
4660 error ("attribute %<target%> argument not a string");
4664 /* Handle multiple arguments separated by commas. */
4665 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4667 while (next_optstr
&& *next_optstr
!= '\0')
4669 char *p
= next_optstr
;
4671 char *comma
= strchr (next_optstr
, ',');
4672 const char *opt_string
;
4673 size_t len
, opt_len
;
4678 enum ix86_opt_type type
= ix86_opt_unknown
;
4684 len
= comma
- next_optstr
;
4685 next_optstr
= comma
+ 1;
4693 /* Recognize no-xxx. */
4694 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4703 /* Find the option. */
4706 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4708 type
= attrs
[i
].type
;
4709 opt_len
= attrs
[i
].len
;
4710 if (ch
== attrs
[i
].string
[0]
4711 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4714 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4717 mask
= attrs
[i
].mask
;
4718 opt_string
= attrs
[i
].string
;
4723 /* Process the option. */
4726 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4730 else if (type
== ix86_opt_isa
)
4732 struct cl_decoded_option decoded
;
4734 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4735 ix86_handle_option (opts
, opts_set
,
4736 &decoded
, input_location
);
4739 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4741 if (type
== ix86_opt_no
)
4742 opt_set_p
= !opt_set_p
;
4745 opts
->x_target_flags
|= mask
;
4747 opts
->x_target_flags
&= ~mask
;
4750 else if (type
== ix86_opt_str
)
4754 error ("option(\"%s\") was already specified", opt_string
);
4758 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4761 else if (type
== ix86_opt_enum
)
4766 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4768 set_option (opts
, enum_opts_set
, opt
, value
,
4769 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4773 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4785 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4788 ix86_valid_target_attribute_tree (tree args
,
4789 struct gcc_options
*opts
,
4790 struct gcc_options
*opts_set
)
4792 const char *orig_arch_string
= opts
->x_ix86_arch_string
;
4793 const char *orig_tune_string
= opts
->x_ix86_tune_string
;
4794 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
4795 int orig_tune_defaulted
= ix86_tune_defaulted
;
4796 int orig_arch_specified
= ix86_arch_specified
;
4797 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4800 struct cl_target_option
*def
4801 = TREE_TARGET_OPTION (target_option_default_node
);
4802 struct gcc_options enum_opts_set
;
4804 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4806 /* Process each of the options on the chain. */
4807 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
4808 opts_set
, &enum_opts_set
))
4809 return error_mark_node
;
4811 /* If the changed options are different from the default, rerun
4812 ix86_option_override_internal, and then save the options away.
4813 The string options are are attribute options, and will be undone
4814 when we copy the save structure. */
4815 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
4816 || opts
->x_target_flags
!= def
->x_target_flags
4817 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4818 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4819 || enum_opts_set
.x_ix86_fpmath
)
4821 /* If we are using the default tune= or arch=, undo the string assigned,
4822 and use the default. */
4823 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4824 opts
->x_ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4825 else if (!orig_arch_specified
)
4826 opts
->x_ix86_arch_string
= NULL
;
4828 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4829 opts
->x_ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4830 else if (orig_tune_defaulted
)
4831 opts
->x_ix86_tune_string
= NULL
;
4833 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4834 if (enum_opts_set
.x_ix86_fpmath
)
4835 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4836 else if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4837 && TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4839 opts
->x_ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4840 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4843 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4844 ix86_option_override_internal (false, opts
, opts_set
);
4846 /* Add any builtin functions with the new isa if any. */
4847 ix86_add_new_builtins (opts
->x_ix86_isa_flags
);
4849 /* Save the current options unless we are validating options for
4851 t
= build_target_option_node (opts
);
4853 opts
->x_ix86_arch_string
= orig_arch_string
;
4854 opts
->x_ix86_tune_string
= orig_tune_string
;
4855 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
4857 /* Free up memory allocated to hold the strings */
4858 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4859 free (option_strings
[i
]);
4865 /* Hook to validate attribute((target("string"))). */
4868 ix86_valid_target_attribute_p (tree fndecl
,
4869 tree
ARG_UNUSED (name
),
4871 int ARG_UNUSED (flags
))
4873 struct gcc_options func_options
;
4874 tree new_target
, new_optimize
;
4877 /* attribute((target("default"))) does nothing, beyond
4878 affecting multi-versioning. */
4879 if (TREE_VALUE (args
)
4880 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4881 && TREE_CHAIN (args
) == NULL_TREE
4882 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4885 tree old_optimize
= build_optimization_node (&global_options
);
4887 /* Get the optimization options of the current function. */
4888 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4891 func_optimize
= old_optimize
;
4893 /* Init func_options. */
4894 memset (&func_options
, 0, sizeof (func_options
));
4895 init_options_struct (&func_options
, NULL
);
4896 lang_hooks
.init_options_struct (&func_options
);
4898 cl_optimization_restore (&func_options
,
4899 TREE_OPTIMIZATION (func_optimize
));
4901 /* Initialize func_options to the default before its target options can
4903 cl_target_option_restore (&func_options
,
4904 TREE_TARGET_OPTION (target_option_default_node
));
4906 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
4907 &global_options_set
);
4909 new_optimize
= build_optimization_node (&func_options
);
4911 if (new_target
== error_mark_node
)
4914 else if (fndecl
&& new_target
)
4916 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4918 if (old_optimize
!= new_optimize
)
4919 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4926 /* Hook to determine if one function can safely inline another. */
4929 ix86_can_inline_p (tree caller
, tree callee
)
4932 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4933 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4935 /* If callee has no option attributes, then it is ok to inline. */
4939 /* If caller has no option attributes, but callee does then it is not ok to
4941 else if (!caller_tree
)
4946 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4947 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4949 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4950 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4952 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4953 != callee_opts
->x_ix86_isa_flags
)
4956 /* See if we have the same non-isa options. */
4957 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4960 /* See if arch, tune, etc. are the same. */
4961 else if (caller_opts
->arch
!= callee_opts
->arch
)
4964 else if (caller_opts
->tune
!= callee_opts
->tune
)
4967 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4970 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4981 /* Remember the last target of ix86_set_current_function. */
4982 static GTY(()) tree ix86_previous_fndecl
;
4984 /* Invalidate ix86_previous_fndecl cache. */
4986 ix86_reset_previous_fndecl (void)
4988 ix86_previous_fndecl
= NULL_TREE
;
4991 /* Establish appropriate back-end context for processing the function
4992 FNDECL. The argument might be NULL to indicate processing at top
4993 level, outside of any function scope. */
4995 ix86_set_current_function (tree fndecl
)
4997 /* Only change the context if the function changes. This hook is called
4998 several times in the course of compiling a function, and we don't want to
4999 slow things down too much or call target_reinit when it isn't safe. */
5000 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
5002 tree old_tree
= (ix86_previous_fndecl
5003 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
5006 tree new_tree
= (fndecl
5007 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
5010 ix86_previous_fndecl
= fndecl
;
5011 if (old_tree
== new_tree
)
5016 cl_target_option_restore (&global_options
,
5017 TREE_TARGET_OPTION (new_tree
));
5018 if (TREE_TARGET_GLOBALS (new_tree
))
5019 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
5021 TREE_TARGET_GLOBALS (new_tree
)
5022 = save_target_globals_default_opts ();
5027 new_tree
= target_option_current_node
;
5028 cl_target_option_restore (&global_options
,
5029 TREE_TARGET_OPTION (new_tree
));
5030 if (TREE_TARGET_GLOBALS (new_tree
))
5031 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
5032 else if (new_tree
== target_option_default_node
)
5033 restore_target_globals (&default_target_globals
);
5035 TREE_TARGET_GLOBALS (new_tree
)
5036 = save_target_globals_default_opts ();
5042 /* Return true if this goes in large data/bss. */
5045 ix86_in_large_data_p (tree exp
)
5047 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
5050 /* Functions are never large data. */
5051 if (TREE_CODE (exp
) == FUNCTION_DECL
)
5054 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
5056 const char *section
= DECL_SECTION_NAME (exp
);
5057 if (strcmp (section
, ".ldata") == 0
5058 || strcmp (section
, ".lbss") == 0)
5064 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
5066 /* If this is an incomplete type with size 0, then we can't put it
5067 in data because it might be too big when completed. Also,
5068 int_size_in_bytes returns -1 if size can vary or is larger than
5069 an integer in which case also it is safer to assume that it goes in
5071 if (size
<= 0 || size
> ix86_section_threshold
)
5078 /* Switch to the appropriate section for output of DECL.
5079 DECL is either a `VAR_DECL' node or a constant of some sort.
5080 RELOC indicates whether forming the initial value of DECL requires
5081 link-time relocations. */
5083 ATTRIBUTE_UNUSED
static section
*
5084 x86_64_elf_select_section (tree decl
, int reloc
,
5085 unsigned HOST_WIDE_INT align
)
5087 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5088 && ix86_in_large_data_p (decl
))
5090 const char *sname
= NULL
;
5091 unsigned int flags
= SECTION_WRITE
;
5092 switch (categorize_decl_for_section (decl
, reloc
))
5097 case SECCAT_DATA_REL
:
5098 sname
= ".ldata.rel";
5100 case SECCAT_DATA_REL_LOCAL
:
5101 sname
= ".ldata.rel.local";
5103 case SECCAT_DATA_REL_RO
:
5104 sname
= ".ldata.rel.ro";
5106 case SECCAT_DATA_REL_RO_LOCAL
:
5107 sname
= ".ldata.rel.ro.local";
5111 flags
|= SECTION_BSS
;
5114 case SECCAT_RODATA_MERGE_STR
:
5115 case SECCAT_RODATA_MERGE_STR_INIT
:
5116 case SECCAT_RODATA_MERGE_CONST
:
5120 case SECCAT_SRODATA
:
5127 /* We don't split these for medium model. Place them into
5128 default sections and hope for best. */
5133 /* We might get called with string constants, but get_named_section
5134 doesn't like them as they are not DECLs. Also, we need to set
5135 flags in that case. */
5137 return get_section (sname
, flags
, NULL
);
5138 return get_named_section (decl
, sname
, reloc
);
5141 return default_elf_select_section (decl
, reloc
, align
);
5144 /* Select a set of attributes for section NAME based on the properties
5145 of DECL and whether or not RELOC indicates that DECL's initializer
5146 might contain runtime relocations. */
5148 static unsigned int ATTRIBUTE_UNUSED
5149 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
5151 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
5153 if (decl
== NULL_TREE
5154 && (strcmp (name
, ".ldata.rel.ro") == 0
5155 || strcmp (name
, ".ldata.rel.ro.local") == 0))
5156 flags
|= SECTION_RELRO
;
5158 if (strcmp (name
, ".lbss") == 0
5159 || strncmp (name
, ".lbss.", 5) == 0
5160 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
5161 flags
|= SECTION_BSS
;
5166 /* Build up a unique section name, expressed as a
5167 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5168 RELOC indicates whether the initial value of EXP requires
5169 link-time relocations. */
5171 static void ATTRIBUTE_UNUSED
5172 x86_64_elf_unique_section (tree decl
, int reloc
)
5174 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5175 && ix86_in_large_data_p (decl
))
5177 const char *prefix
= NULL
;
5178 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5179 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
5181 switch (categorize_decl_for_section (decl
, reloc
))
5184 case SECCAT_DATA_REL
:
5185 case SECCAT_DATA_REL_LOCAL
:
5186 case SECCAT_DATA_REL_RO
:
5187 case SECCAT_DATA_REL_RO_LOCAL
:
5188 prefix
= one_only
? ".ld" : ".ldata";
5191 prefix
= one_only
? ".lb" : ".lbss";
5194 case SECCAT_RODATA_MERGE_STR
:
5195 case SECCAT_RODATA_MERGE_STR_INIT
:
5196 case SECCAT_RODATA_MERGE_CONST
:
5197 prefix
= one_only
? ".lr" : ".lrodata";
5199 case SECCAT_SRODATA
:
5206 /* We don't split these for medium model. Place them into
5207 default sections and hope for best. */
5212 const char *name
, *linkonce
;
5215 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
5216 name
= targetm
.strip_name_encoding (name
);
5218 /* If we're using one_only, then there needs to be a .gnu.linkonce
5219 prefix to the section name. */
5220 linkonce
= one_only
? ".gnu.linkonce" : "";
5222 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
5224 set_decl_section_name (decl
, string
);
5228 default_unique_section (decl
, reloc
);
5231 #ifdef COMMON_ASM_OP
5232 /* This says how to output assembler code to declare an
5233 uninitialized external linkage data object.
5235 For medium model x86-64 we need to use .largecomm opcode for
5238 x86_elf_aligned_common (FILE *file
,
5239 const char *name
, unsigned HOST_WIDE_INT size
,
5242 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5243 && size
> (unsigned int)ix86_section_threshold
)
5244 fputs (".largecomm\t", file
);
5246 fputs (COMMON_ASM_OP
, file
);
5247 assemble_name (file
, name
);
5248 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
5249 size
, align
/ BITS_PER_UNIT
);
5253 /* Utility function for targets to use in implementing
5254 ASM_OUTPUT_ALIGNED_BSS. */
5257 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
5258 unsigned HOST_WIDE_INT size
, int align
)
5260 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5261 && size
> (unsigned int)ix86_section_threshold
)
5262 switch_to_section (get_named_section (decl
, ".lbss", 0));
5264 switch_to_section (bss_section
);
5265 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
5266 #ifdef ASM_DECLARE_OBJECT_NAME
5267 last_assemble_variable_decl
= decl
;
5268 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
5270 /* Standard thing is just output label for the object. */
5271 ASM_OUTPUT_LABEL (file
, name
);
5272 #endif /* ASM_DECLARE_OBJECT_NAME */
5273 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
5276 /* Decide whether we must probe the stack before any space allocation
5277 on this target. It's essentially TARGET_STACK_PROBE except when
5278 -fstack-check causes the stack to be already probed differently. */
5281 ix86_target_stack_probe (void)
5283 /* Do not probe the stack twice if static stack checking is enabled. */
5284 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5287 return TARGET_STACK_PROBE
;
5290 /* Decide whether we can make a sibling call to a function. DECL is the
5291 declaration of the function being targeted by the call and EXP is the
5292 CALL_EXPR representing the call. */
5295 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5297 tree type
, decl_or_type
;
5300 /* If we are generating position-independent code, we cannot sibcall
5301 optimize any indirect call, or a direct call to a global function,
5302 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5306 && (!decl
|| !targetm
.binds_local_p (decl
)))
5309 /* If we need to align the outgoing stack, then sibcalling would
5310 unalign the stack, which may break the called function. */
5311 if (ix86_minimum_incoming_stack_boundary (true)
5312 < PREFERRED_STACK_BOUNDARY
)
5317 decl_or_type
= decl
;
5318 type
= TREE_TYPE (decl
);
5322 /* We're looking at the CALL_EXPR, we need the type of the function. */
5323 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5324 type
= TREE_TYPE (type
); /* pointer type */
5325 type
= TREE_TYPE (type
); /* function type */
5326 decl_or_type
= type
;
5329 /* Check that the return value locations are the same. Like
5330 if we are returning floats on the 80387 register stack, we cannot
5331 make a sibcall from a function that doesn't return a float to a
5332 function that does or, conversely, from a function that does return
5333 a float to a function that doesn't; the necessary stack adjustment
5334 would not be executed. This is also the place we notice
5335 differences in the return value ABI. Note that it is ok for one
5336 of the functions to have void return type as long as the return
5337 value of the other is passed in a register. */
5338 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5339 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5341 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5343 if (!rtx_equal_p (a
, b
))
5346 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5348 else if (!rtx_equal_p (a
, b
))
5353 /* The SYSV ABI has more call-clobbered registers;
5354 disallow sibcalls from MS to SYSV. */
5355 if (cfun
->machine
->call_abi
== MS_ABI
5356 && ix86_function_type_abi (type
) == SYSV_ABI
)
5361 /* If this call is indirect, we'll need to be able to use a
5362 call-clobbered register for the address of the target function.
5363 Make sure that all such registers are not used for passing
5364 parameters. Note that DLLIMPORT functions are indirect. */
5366 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5368 if (ix86_function_regparm (type
, NULL
) >= 3)
5370 /* ??? Need to count the actual number of registers to be used,
5371 not the possible number of registers. Fix later. */
5377 /* Otherwise okay. That also includes certain types of indirect calls. */
5381 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5382 and "sseregparm" calling convention attributes;
5383 arguments as in struct attribute_spec.handler. */
5386 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5391 if (TREE_CODE (*node
) != FUNCTION_TYPE
5392 && TREE_CODE (*node
) != METHOD_TYPE
5393 && TREE_CODE (*node
) != FIELD_DECL
5394 && TREE_CODE (*node
) != TYPE_DECL
)
5396 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5398 *no_add_attrs
= true;
5402 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5403 if (is_attribute_p ("regparm", name
))
5407 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5409 error ("fastcall and regparm attributes are not compatible");
5412 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5414 error ("regparam and thiscall attributes are not compatible");
5417 cst
= TREE_VALUE (args
);
5418 if (TREE_CODE (cst
) != INTEGER_CST
)
5420 warning (OPT_Wattributes
,
5421 "%qE attribute requires an integer constant argument",
5423 *no_add_attrs
= true;
5425 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5427 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5429 *no_add_attrs
= true;
5437 /* Do not warn when emulating the MS ABI. */
5438 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5439 && TREE_CODE (*node
) != METHOD_TYPE
)
5440 || ix86_function_type_abi (*node
) != MS_ABI
)
5441 warning (OPT_Wattributes
, "%qE attribute ignored",
5443 *no_add_attrs
= true;
5447 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5448 if (is_attribute_p ("fastcall", name
))
5450 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5452 error ("fastcall and cdecl attributes are not compatible");
5454 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5456 error ("fastcall and stdcall attributes are not compatible");
5458 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5460 error ("fastcall and regparm attributes are not compatible");
5462 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5464 error ("fastcall and thiscall attributes are not compatible");
5468 /* Can combine stdcall with fastcall (redundant), regparm and
5470 else if (is_attribute_p ("stdcall", name
))
5472 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5474 error ("stdcall and cdecl attributes are not compatible");
5476 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5478 error ("stdcall and fastcall attributes are not compatible");
5480 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5482 error ("stdcall and thiscall attributes are not compatible");
5486 /* Can combine cdecl with regparm and sseregparm. */
5487 else if (is_attribute_p ("cdecl", name
))
5489 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5491 error ("stdcall and cdecl attributes are not compatible");
5493 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5495 error ("fastcall and cdecl attributes are not compatible");
5497 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5499 error ("cdecl and thiscall attributes are not compatible");
5502 else if (is_attribute_p ("thiscall", name
))
5504 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5505 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5509 error ("stdcall and thiscall attributes are not compatible");
5511 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5513 error ("fastcall and thiscall attributes are not compatible");
5515 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5517 error ("cdecl and thiscall attributes are not compatible");
5521 /* Can combine sseregparm with all attributes. */
5526 /* The transactional memory builtins are implicitly regparm or fastcall
5527 depending on the ABI. Override the generic do-nothing attribute that
5528 these builtins were declared with, and replace it with one of the two
5529 attributes that we expect elsewhere. */
5532 ix86_handle_tm_regparm_attribute (tree
*node
, tree
, tree
,
5533 int flags
, bool *no_add_attrs
)
5537 /* In no case do we want to add the placeholder attribute. */
5538 *no_add_attrs
= true;
5540 /* The 64-bit ABI is unchanged for transactional memory. */
5544 /* ??? Is there a better way to validate 32-bit windows? We have
5545 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5546 if (CHECK_STACK_LIMIT
> 0)
5547 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5550 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5551 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5553 decl_attributes (node
, alt
, flags
);
5558 /* This function determines from TYPE the calling-convention. */
5561 ix86_get_callcvt (const_tree type
)
5563 unsigned int ret
= 0;
5568 return IX86_CALLCVT_CDECL
;
5570 attrs
= TYPE_ATTRIBUTES (type
);
5571 if (attrs
!= NULL_TREE
)
5573 if (lookup_attribute ("cdecl", attrs
))
5574 ret
|= IX86_CALLCVT_CDECL
;
5575 else if (lookup_attribute ("stdcall", attrs
))
5576 ret
|= IX86_CALLCVT_STDCALL
;
5577 else if (lookup_attribute ("fastcall", attrs
))
5578 ret
|= IX86_CALLCVT_FASTCALL
;
5579 else if (lookup_attribute ("thiscall", attrs
))
5580 ret
|= IX86_CALLCVT_THISCALL
;
5582 /* Regparam isn't allowed for thiscall and fastcall. */
5583 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5585 if (lookup_attribute ("regparm", attrs
))
5586 ret
|= IX86_CALLCVT_REGPARM
;
5587 if (lookup_attribute ("sseregparm", attrs
))
5588 ret
|= IX86_CALLCVT_SSEREGPARM
;
5591 if (IX86_BASE_CALLCVT(ret
) != 0)
5595 is_stdarg
= stdarg_p (type
);
5596 if (TARGET_RTD
&& !is_stdarg
)
5597 return IX86_CALLCVT_STDCALL
| ret
;
5601 || TREE_CODE (type
) != METHOD_TYPE
5602 || ix86_function_type_abi (type
) != MS_ABI
)
5603 return IX86_CALLCVT_CDECL
| ret
;
5605 return IX86_CALLCVT_THISCALL
;
5608 /* Return 0 if the attributes for two types are incompatible, 1 if they
5609 are compatible, and 2 if they are nearly compatible (which causes a
5610 warning to be generated). */
5613 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5615 unsigned int ccvt1
, ccvt2
;
5617 if (TREE_CODE (type1
) != FUNCTION_TYPE
5618 && TREE_CODE (type1
) != METHOD_TYPE
)
5621 ccvt1
= ix86_get_callcvt (type1
);
5622 ccvt2
= ix86_get_callcvt (type2
);
5625 if (ix86_function_regparm (type1
, NULL
)
5626 != ix86_function_regparm (type2
, NULL
))
5632 /* Return the regparm value for a function with the indicated TYPE and DECL.
5633 DECL may be NULL when calling function indirectly
5634 or considering a libcall. */
5637 ix86_function_regparm (const_tree type
, const_tree decl
)
5644 return (ix86_function_type_abi (type
) == SYSV_ABI
5645 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5646 ccvt
= ix86_get_callcvt (type
);
5647 regparm
= ix86_regparm
;
5649 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5651 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5654 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5658 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5660 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5663 /* Use register calling convention for local functions when possible. */
5665 && TREE_CODE (decl
) == FUNCTION_DECL
5666 /* Caller and callee must agree on the calling convention, so
5667 checking here just optimize means that with
5668 __attribute__((optimize (...))) caller could use regparm convention
5669 and callee not, or vice versa. Instead look at whether the callee
5670 is optimized or not. */
5671 && opt_for_fn (decl
, optimize
)
5672 && !(profile_flag
&& !flag_fentry
))
5674 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5675 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE (decl
));
5676 if (i
&& i
->local
&& i
->can_change_signature
)
5678 int local_regparm
, globals
= 0, regno
;
5680 /* Make sure no regparm register is taken by a
5681 fixed register variable. */
5682 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5683 if (fixed_regs
[local_regparm
])
5686 /* We don't want to use regparm(3) for nested functions as
5687 these use a static chain pointer in the third argument. */
5688 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5691 /* In 32-bit mode save a register for the split stack. */
5692 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5695 /* Each fixed register usage increases register pressure,
5696 so less registers should be used for argument passing.
5697 This functionality can be overriden by an explicit
5699 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5700 if (fixed_regs
[regno
])
5704 = globals
< local_regparm
? local_regparm
- globals
: 0;
5706 if (local_regparm
> regparm
)
5707 regparm
= local_regparm
;
5714 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5715 DFmode (2) arguments in SSE registers for a function with the
5716 indicated TYPE and DECL. DECL may be NULL when calling function
5717 indirectly or considering a libcall. Otherwise return 0. */
5720 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5722 gcc_assert (!TARGET_64BIT
);
5724 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5725 by the sseregparm attribute. */
5726 if (TARGET_SSEREGPARM
5727 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5734 error ("calling %qD with attribute sseregparm without "
5735 "SSE/SSE2 enabled", decl
);
5737 error ("calling %qT with attribute sseregparm without "
5738 "SSE/SSE2 enabled", type
);
5746 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5747 (and DFmode for SSE2) arguments in SSE registers. */
5748 if (decl
&& TARGET_SSE_MATH
&& optimize
5749 && !(profile_flag
&& !flag_fentry
))
5751 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5752 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5753 if (i
&& i
->local
&& i
->can_change_signature
)
5754 return TARGET_SSE2
? 2 : 1;
5760 /* Return true if EAX is live at the start of the function. Used by
5761 ix86_expand_prologue to determine if we need special help before
5762 calling allocate_stack_worker. */
5765 ix86_eax_live_at_start_p (void)
5767 /* Cheat. Don't bother working forward from ix86_function_regparm
5768 to the function type to whether an actual argument is located in
5769 eax. Instead just look at cfg info, which is still close enough
5770 to correct at this point. This gives false positives for broken
5771 functions that might use uninitialized data that happens to be
5772 allocated in eax, but who cares? */
5773 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
5777 ix86_keep_aggregate_return_pointer (tree fntype
)
5783 attr
= lookup_attribute ("callee_pop_aggregate_return",
5784 TYPE_ATTRIBUTES (fntype
));
5786 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5788 /* For 32-bit MS-ABI the default is to keep aggregate
5790 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5793 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5796 /* Value is the number of bytes of arguments automatically
5797 popped when returning from a subroutine call.
5798 FUNDECL is the declaration node of the function (as a tree),
5799 FUNTYPE is the data type of the function (as a tree),
5800 or for a library call it is an identifier node for the subroutine name.
5801 SIZE is the number of bytes of arguments passed on the stack.
5803 On the 80386, the RTD insn may be used to pop them if the number
5804 of args is fixed, but if the number is variable then the caller
5805 must pop them all. RTD can't be used for library calls now
5806 because the library is compiled with the Unix compiler.
5807 Use of RTD is a selectable option, since it is incompatible with
5808 standard Unix calling sequences. If the option is not selected,
5809 the caller must always pop the args.
5811 The attribute stdcall is equivalent to RTD on a per module basis. */
5814 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5818 /* None of the 64-bit ABIs pop arguments. */
5822 ccvt
= ix86_get_callcvt (funtype
);
5824 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5825 | IX86_CALLCVT_THISCALL
)) != 0
5826 && ! stdarg_p (funtype
))
5829 /* Lose any fake structure return argument if it is passed on the stack. */
5830 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5831 && !ix86_keep_aggregate_return_pointer (funtype
))
5833 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5835 return GET_MODE_SIZE (Pmode
);
5841 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5844 ix86_legitimate_combined_insn (rtx_insn
*insn
)
5846 /* Check operand constraints in case hard registers were propagated
5847 into insn pattern. This check prevents combine pass from
5848 generating insn patterns with invalid hard register operands.
5849 These invalid insns can eventually confuse reload to error out
5850 with a spill failure. See also PRs 46829 and 46843. */
5851 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5855 extract_insn (insn
);
5856 preprocess_constraints (insn
);
5858 int n_operands
= recog_data
.n_operands
;
5859 int n_alternatives
= recog_data
.n_alternatives
;
5860 for (i
= 0; i
< n_operands
; i
++)
5862 rtx op
= recog_data
.operand
[i
];
5863 enum machine_mode mode
= GET_MODE (op
);
5864 const operand_alternative
*op_alt
;
5869 /* For pre-AVX disallow unaligned loads/stores where the
5870 instructions don't support it. */
5872 && VECTOR_MODE_P (GET_MODE (op
))
5873 && misaligned_operand (op
, GET_MODE (op
)))
5875 int min_align
= get_attr_ssememalign (insn
);
5880 /* A unary operator may be accepted by the predicate, but it
5881 is irrelevant for matching constraints. */
5885 if (GET_CODE (op
) == SUBREG
)
5887 if (REG_P (SUBREG_REG (op
))
5888 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5889 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5890 GET_MODE (SUBREG_REG (op
)),
5893 op
= SUBREG_REG (op
);
5896 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5899 op_alt
= recog_op_alt
;
5901 /* Operand has no constraints, anything is OK. */
5902 win
= !n_alternatives
;
5904 alternative_mask enabled
= recog_data
.enabled_alternatives
;
5905 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
5907 if (!TEST_BIT (enabled
, j
))
5909 if (op_alt
[i
].anything_ok
5910 || (op_alt
[i
].matches
!= -1
5912 (recog_data
.operand
[i
],
5913 recog_data
.operand
[op_alt
[i
].matches
]))
5914 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
5929 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5931 static unsigned HOST_WIDE_INT
5932 ix86_asan_shadow_offset (void)
5934 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5935 : HOST_WIDE_INT_C (0x7fff8000))
5936 : (HOST_WIDE_INT_1
<< 29);
5939 /* Argument support functions. */
5941 /* Return true when register may be used to pass function parameters. */
5943 ix86_function_arg_regno_p (int regno
)
5946 const int *parm_regs
;
5951 return (regno
< REGPARM_MAX
5952 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5954 return (regno
< REGPARM_MAX
5955 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5956 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5957 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5958 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5961 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5962 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5965 /* TODO: The function should depend on current function ABI but
5966 builtins.c would need updating then. Therefore we use the
5969 /* RAX is used as hidden argument to va_arg functions. */
5970 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5973 if (ix86_abi
== MS_ABI
)
5974 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5976 parm_regs
= x86_64_int_parameter_registers
;
5977 for (i
= 0; i
< (ix86_abi
== MS_ABI
5978 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5979 if (regno
== parm_regs
[i
])
5984 /* Return if we do not know how to pass TYPE solely in registers. */
5987 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5989 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5992 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5993 The layout_type routine is crafty and tries to trick us into passing
5994 currently unsupported vector types on the stack by using TImode. */
5995 return (!TARGET_64BIT
&& mode
== TImode
5996 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5999 /* It returns the size, in bytes, of the area reserved for arguments passed
6000 in registers for the function represented by fndecl dependent to the used
6003 ix86_reg_parm_stack_space (const_tree fndecl
)
6005 enum calling_abi call_abi
= SYSV_ABI
;
6006 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
6007 call_abi
= ix86_function_abi (fndecl
);
6009 call_abi
= ix86_function_type_abi (fndecl
);
6010 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
6015 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6018 ix86_function_type_abi (const_tree fntype
)
6020 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
6022 enum calling_abi abi
= ix86_abi
;
6023 if (abi
== SYSV_ABI
)
6025 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
6028 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
6035 /* We add this as a workaround in order to use libc_has_function
6038 ix86_libc_has_function (enum function_class fn_class
)
6040 return targetm
.libc_has_function (fn_class
);
6044 ix86_function_ms_hook_prologue (const_tree fn
)
6046 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
6048 if (decl_function_context (fn
) != NULL_TREE
)
6049 error_at (DECL_SOURCE_LOCATION (fn
),
6050 "ms_hook_prologue is not compatible with nested function");
6057 static enum calling_abi
6058 ix86_function_abi (const_tree fndecl
)
6062 return ix86_function_type_abi (TREE_TYPE (fndecl
));
6065 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6068 ix86_cfun_abi (void)
6072 return cfun
->machine
->call_abi
;
6075 /* Write the extra assembler code needed to declare a function properly. */
6078 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
6081 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
6085 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
6086 unsigned int filler_cc
= 0xcccccccc;
6088 for (i
= 0; i
< filler_count
; i
+= 4)
6089 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
6092 #ifdef SUBTARGET_ASM_UNWIND_INIT
6093 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
6096 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
6098 /* Output magic byte marker, if hot-patch attribute is set. */
6103 /* leaq [%rsp + 0], %rsp */
6104 asm_fprintf (asm_out_file
, ASM_BYTE
6105 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6109 /* movl.s %edi, %edi
6111 movl.s %esp, %ebp */
6112 asm_fprintf (asm_out_file
, ASM_BYTE
6113 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6119 extern void init_regs (void);
6121 /* Implementation of call abi switching target hook. Specific to FNDECL
6122 the specific call register sets are set. See also
6123 ix86_conditional_register_usage for more details. */
6125 ix86_call_abi_override (const_tree fndecl
)
6127 if (fndecl
== NULL_TREE
)
6128 cfun
->machine
->call_abi
= ix86_abi
;
6130 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
6133 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6134 expensive re-initialization of init_regs each time we switch function context
6135 since this is needed only during RTL expansion. */
6137 ix86_maybe_switch_abi (void)
6140 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
6144 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6145 for a call to a function whose data type is FNTYPE.
6146 For a library call, FNTYPE is 0. */
6149 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
6150 tree fntype
, /* tree ptr for function decl */
6151 rtx libname
, /* SYMBOL_REF of library name or 0 */
6155 struct cgraph_local_info
*i
;
6157 memset (cum
, 0, sizeof (*cum
));
6161 i
= cgraph_node::local_info (fndecl
);
6162 cum
->call_abi
= ix86_function_abi (fndecl
);
6167 cum
->call_abi
= ix86_function_type_abi (fntype
);
6170 cum
->caller
= caller
;
6172 /* Set up the number of registers to use for passing arguments. */
6173 cum
->nregs
= ix86_regparm
;
6176 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
6177 ? X86_64_REGPARM_MAX
6178 : X86_64_MS_REGPARM_MAX
);
6182 cum
->sse_nregs
= SSE_REGPARM_MAX
;
6185 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
6186 ? X86_64_SSE_REGPARM_MAX
6187 : X86_64_MS_SSE_REGPARM_MAX
);
6191 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
6192 cum
->warn_avx512f
= true;
6193 cum
->warn_avx
= true;
6194 cum
->warn_sse
= true;
6195 cum
->warn_mmx
= true;
6197 /* Because type might mismatch in between caller and callee, we need to
6198 use actual type of function for local calls.
6199 FIXME: cgraph_analyze can be told to actually record if function uses
6200 va_start so for local functions maybe_vaarg can be made aggressive
6202 FIXME: once typesytem is fixed, we won't need this code anymore. */
6203 if (i
&& i
->local
&& i
->can_change_signature
)
6204 fntype
= TREE_TYPE (fndecl
);
6205 cum
->maybe_vaarg
= (fntype
6206 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
6211 /* If there are variable arguments, then we won't pass anything
6212 in registers in 32-bit mode. */
6213 if (stdarg_p (fntype
))
6218 cum
->warn_avx512f
= false;
6219 cum
->warn_avx
= false;
6220 cum
->warn_sse
= false;
6221 cum
->warn_mmx
= false;
6225 /* Use ecx and edx registers if function has fastcall attribute,
6226 else look for regparm information. */
6229 unsigned int ccvt
= ix86_get_callcvt (fntype
);
6230 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
6233 cum
->fastcall
= 1; /* Same first register as in fastcall. */
6235 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
6241 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
6244 /* Set up the number of SSE registers used for passing SFmode
6245 and DFmode arguments. Warn for mismatching ABI. */
6246 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
6250 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6251 But in the case of vector types, it is some vector mode.
6253 When we have only some of our vector isa extensions enabled, then there
6254 are some modes for which vector_mode_supported_p is false. For these
6255 modes, the generic vector support in gcc will choose some non-vector mode
6256 in order to implement the type. By computing the natural mode, we'll
6257 select the proper ABI location for the operand and not depend on whatever
6258 the middle-end decides to do with these vector types.
6260 The midde-end can't deal with the vector types > 16 bytes. In this
6261 case, we return the original mode and warn ABI change if CUM isn't
6264 If INT_RETURN is true, warn ABI change if the vector mode isn't
6265 available for function return value. */
6267 static enum machine_mode
6268 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
6271 enum machine_mode mode
= TYPE_MODE (type
);
6273 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
6275 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6276 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
6277 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6278 && TYPE_VECTOR_SUBPARTS (type
) > 1)
6280 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
6282 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
6283 mode
= MIN_MODE_VECTOR_FLOAT
;
6285 mode
= MIN_MODE_VECTOR_INT
;
6287 /* Get the mode which has this inner mode and number of units. */
6288 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
6289 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
6290 && GET_MODE_INNER (mode
) == innermode
)
6292 if (size
== 64 && !TARGET_AVX512F
)
6294 static bool warnedavx512f
;
6295 static bool warnedavx512f_ret
;
6297 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
6299 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
6300 "without AVX512F enabled changes the ABI"))
6301 warnedavx512f
= true;
6303 else if (in_return
&& !warnedavx512f_ret
)
6305 if (warning (OPT_Wpsabi
, "AVX512F vector return "
6306 "without AVX512F enabled changes the ABI"))
6307 warnedavx512f_ret
= true;
6310 return TYPE_MODE (type
);
6312 else if (size
== 32 && !TARGET_AVX
)
6314 static bool warnedavx
;
6315 static bool warnedavx_ret
;
6317 if (cum
&& cum
->warn_avx
&& !warnedavx
)
6319 if (warning (OPT_Wpsabi
, "AVX vector argument "
6320 "without AVX enabled changes the ABI"))
6323 else if (in_return
&& !warnedavx_ret
)
6325 if (warning (OPT_Wpsabi
, "AVX vector return "
6326 "without AVX enabled changes the ABI"))
6327 warnedavx_ret
= true;
6330 return TYPE_MODE (type
);
6332 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
6335 static bool warnedsse
;
6336 static bool warnedsse_ret
;
6338 if (cum
&& cum
->warn_sse
&& !warnedsse
)
6340 if (warning (OPT_Wpsabi
, "SSE vector argument "
6341 "without SSE enabled changes the ABI"))
6344 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
6346 if (warning (OPT_Wpsabi
, "SSE vector return "
6347 "without SSE enabled changes the ABI"))
6348 warnedsse_ret
= true;
6351 else if ((size
== 8 && !TARGET_64BIT
) && !TARGET_MMX
)
6353 static bool warnedmmx
;
6354 static bool warnedmmx_ret
;
6356 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
6358 if (warning (OPT_Wpsabi
, "MMX vector argument "
6359 "without MMX enabled changes the ABI"))
6362 else if (in_return
&& !warnedmmx_ret
)
6364 if (warning (OPT_Wpsabi
, "MMX vector return "
6365 "without MMX enabled changes the ABI"))
6366 warnedmmx_ret
= true;
6379 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6380 this may not agree with the mode that the type system has chosen for the
6381 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6382 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6385 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6390 if (orig_mode
!= BLKmode
)
6391 tmp
= gen_rtx_REG (orig_mode
, regno
);
6394 tmp
= gen_rtx_REG (mode
, regno
);
6395 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6396 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6402 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6403 of this code is to classify each 8bytes of incoming argument by the register
6404 class and assign registers accordingly. */
6406 /* Return the union class of CLASS1 and CLASS2.
6407 See the x86-64 PS ABI for details. */
6409 static enum x86_64_reg_class
6410 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6412 /* Rule #1: If both classes are equal, this is the resulting class. */
6413 if (class1
== class2
)
6416 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6418 if (class1
== X86_64_NO_CLASS
)
6420 if (class2
== X86_64_NO_CLASS
)
6423 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6424 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6425 return X86_64_MEMORY_CLASS
;
6427 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6428 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6429 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6430 return X86_64_INTEGERSI_CLASS
;
6431 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6432 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6433 return X86_64_INTEGER_CLASS
;
6435 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6437 if (class1
== X86_64_X87_CLASS
6438 || class1
== X86_64_X87UP_CLASS
6439 || class1
== X86_64_COMPLEX_X87_CLASS
6440 || class2
== X86_64_X87_CLASS
6441 || class2
== X86_64_X87UP_CLASS
6442 || class2
== X86_64_COMPLEX_X87_CLASS
)
6443 return X86_64_MEMORY_CLASS
;
6445 /* Rule #6: Otherwise class SSE is used. */
6446 return X86_64_SSE_CLASS
;
6449 /* Classify the argument of type TYPE and mode MODE.
6450 CLASSES will be filled by the register class used to pass each word
6451 of the operand. The number of words is returned. In case the parameter
6452 should be passed in memory, 0 is returned. As a special case for zero
6453 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6455 BIT_OFFSET is used internally for handling records and specifies offset
6456 of the offset in bits modulo 512 to avoid overflow cases.
6458 See the x86-64 PS ABI for details.
6462 classify_argument (enum machine_mode mode
, const_tree type
,
6463 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6465 HOST_WIDE_INT bytes
=
6466 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6468 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6470 /* Variable sized entities are always passed/returned in memory. */
6474 if (mode
!= VOIDmode
6475 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6478 if (type
&& AGGREGATE_TYPE_P (type
))
6482 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6484 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6488 for (i
= 0; i
< words
; i
++)
6489 classes
[i
] = X86_64_NO_CLASS
;
6491 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6492 signalize memory class, so handle it as special case. */
6495 classes
[0] = X86_64_NO_CLASS
;
6499 /* Classify each field of record and merge classes. */
6500 switch (TREE_CODE (type
))
6503 /* And now merge the fields of structure. */
6504 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6506 if (TREE_CODE (field
) == FIELD_DECL
)
6510 if (TREE_TYPE (field
) == error_mark_node
)
6513 /* Bitfields are always classified as integer. Handle them
6514 early, since later code would consider them to be
6515 misaligned integers. */
6516 if (DECL_BIT_FIELD (field
))
6518 for (i
= (int_bit_position (field
)
6519 + (bit_offset
% 64)) / 8 / 8;
6520 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6521 + tree_to_shwi (DECL_SIZE (field
))
6524 merge_classes (X86_64_INTEGER_CLASS
,
6531 type
= TREE_TYPE (field
);
6533 /* Flexible array member is ignored. */
6534 if (TYPE_MODE (type
) == BLKmode
6535 && TREE_CODE (type
) == ARRAY_TYPE
6536 && TYPE_SIZE (type
) == NULL_TREE
6537 && TYPE_DOMAIN (type
) != NULL_TREE
6538 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6543 if (!warned
&& warn_psabi
)
6546 inform (input_location
,
6547 "the ABI of passing struct with"
6548 " a flexible array member has"
6549 " changed in GCC 4.4");
6553 num
= classify_argument (TYPE_MODE (type
), type
,
6555 (int_bit_position (field
)
6556 + bit_offset
) % 512);
6559 pos
= (int_bit_position (field
)
6560 + (bit_offset
% 64)) / 8 / 8;
6561 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6563 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6570 /* Arrays are handled as small records. */
6573 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6574 TREE_TYPE (type
), subclasses
, bit_offset
);
6578 /* The partial classes are now full classes. */
6579 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6580 subclasses
[0] = X86_64_SSE_CLASS
;
6581 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6582 && !((bit_offset
% 64) == 0 && bytes
== 4))
6583 subclasses
[0] = X86_64_INTEGER_CLASS
;
6585 for (i
= 0; i
< words
; i
++)
6586 classes
[i
] = subclasses
[i
% num
];
6591 case QUAL_UNION_TYPE
:
6592 /* Unions are similar to RECORD_TYPE but offset is always 0.
6594 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6596 if (TREE_CODE (field
) == FIELD_DECL
)
6600 if (TREE_TYPE (field
) == error_mark_node
)
6603 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6604 TREE_TYPE (field
), subclasses
,
6608 for (i
= 0; i
< num
&& i
< words
; i
++)
6609 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6620 /* When size > 16 bytes, if the first one isn't
6621 X86_64_SSE_CLASS or any other ones aren't
6622 X86_64_SSEUP_CLASS, everything should be passed in
6624 if (classes
[0] != X86_64_SSE_CLASS
)
6627 for (i
= 1; i
< words
; i
++)
6628 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6632 /* Final merger cleanup. */
6633 for (i
= 0; i
< words
; i
++)
6635 /* If one class is MEMORY, everything should be passed in
6637 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6640 /* The X86_64_SSEUP_CLASS should be always preceded by
6641 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6642 if (classes
[i
] == X86_64_SSEUP_CLASS
6643 && classes
[i
- 1] != X86_64_SSE_CLASS
6644 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6646 /* The first one should never be X86_64_SSEUP_CLASS. */
6647 gcc_assert (i
!= 0);
6648 classes
[i
] = X86_64_SSE_CLASS
;
6651 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6652 everything should be passed in memory. */
6653 if (classes
[i
] == X86_64_X87UP_CLASS
6654 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6658 /* The first one should never be X86_64_X87UP_CLASS. */
6659 gcc_assert (i
!= 0);
6660 if (!warned
&& warn_psabi
)
6663 inform (input_location
,
6664 "the ABI of passing union with long double"
6665 " has changed in GCC 4.4");
6673 /* Compute alignment needed. We align all types to natural boundaries with
6674 exception of XFmode that is aligned to 64bits. */
6675 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6677 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6680 mode_alignment
= 128;
6681 else if (mode
== XCmode
)
6682 mode_alignment
= 256;
6683 if (COMPLEX_MODE_P (mode
))
6684 mode_alignment
/= 2;
6685 /* Misaligned fields are always returned in memory. */
6686 if (bit_offset
% mode_alignment
)
6690 /* for V1xx modes, just use the base mode */
6691 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6692 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6693 mode
= GET_MODE_INNER (mode
);
6695 /* Classification of atomic types. */
6700 classes
[0] = X86_64_SSE_CLASS
;
6703 classes
[0] = X86_64_SSE_CLASS
;
6704 classes
[1] = X86_64_SSEUP_CLASS
;
6714 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
6716 /* Analyze last 128 bits only. */
6717 size
= (size
- 1) & 0x7f;
6721 classes
[0] = X86_64_INTEGERSI_CLASS
;
6726 classes
[0] = X86_64_INTEGER_CLASS
;
6729 else if (size
< 64+32)
6731 classes
[0] = X86_64_INTEGER_CLASS
;
6732 classes
[1] = X86_64_INTEGERSI_CLASS
;
6735 else if (size
< 64+64)
6737 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6745 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6749 /* OImode shouldn't be used directly. */
6754 if (!(bit_offset
% 64))
6755 classes
[0] = X86_64_SSESF_CLASS
;
6757 classes
[0] = X86_64_SSE_CLASS
;
6760 classes
[0] = X86_64_SSEDF_CLASS
;
6763 classes
[0] = X86_64_X87_CLASS
;
6764 classes
[1] = X86_64_X87UP_CLASS
;
6767 classes
[0] = X86_64_SSE_CLASS
;
6768 classes
[1] = X86_64_SSEUP_CLASS
;
6771 classes
[0] = X86_64_SSE_CLASS
;
6772 if (!(bit_offset
% 64))
6778 if (!warned
&& warn_psabi
)
6781 inform (input_location
,
6782 "the ABI of passing structure with complex float"
6783 " member has changed in GCC 4.4");
6785 classes
[1] = X86_64_SSESF_CLASS
;
6789 classes
[0] = X86_64_SSEDF_CLASS
;
6790 classes
[1] = X86_64_SSEDF_CLASS
;
6793 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6796 /* This modes is larger than 16 bytes. */
6804 classes
[0] = X86_64_SSE_CLASS
;
6805 classes
[1] = X86_64_SSEUP_CLASS
;
6806 classes
[2] = X86_64_SSEUP_CLASS
;
6807 classes
[3] = X86_64_SSEUP_CLASS
;
6815 classes
[0] = X86_64_SSE_CLASS
;
6816 classes
[1] = X86_64_SSEUP_CLASS
;
6817 classes
[2] = X86_64_SSEUP_CLASS
;
6818 classes
[3] = X86_64_SSEUP_CLASS
;
6819 classes
[4] = X86_64_SSEUP_CLASS
;
6820 classes
[5] = X86_64_SSEUP_CLASS
;
6821 classes
[6] = X86_64_SSEUP_CLASS
;
6822 classes
[7] = X86_64_SSEUP_CLASS
;
6830 classes
[0] = X86_64_SSE_CLASS
;
6831 classes
[1] = X86_64_SSEUP_CLASS
;
6839 classes
[0] = X86_64_SSE_CLASS
;
6845 gcc_assert (VECTOR_MODE_P (mode
));
6850 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6852 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6853 classes
[0] = X86_64_INTEGERSI_CLASS
;
6855 classes
[0] = X86_64_INTEGER_CLASS
;
6856 classes
[1] = X86_64_INTEGER_CLASS
;
6857 return 1 + (bytes
> 8);
6861 /* Examine the argument and return set number of register required in each
6862 class. Return true iff parameter should be passed in memory. */
6865 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6866 int *int_nregs
, int *sse_nregs
)
6868 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6869 int n
= classify_argument (mode
, type
, regclass
, 0);
6876 for (n
--; n
>= 0; n
--)
6877 switch (regclass
[n
])
6879 case X86_64_INTEGER_CLASS
:
6880 case X86_64_INTEGERSI_CLASS
:
6883 case X86_64_SSE_CLASS
:
6884 case X86_64_SSESF_CLASS
:
6885 case X86_64_SSEDF_CLASS
:
6888 case X86_64_NO_CLASS
:
6889 case X86_64_SSEUP_CLASS
:
6891 case X86_64_X87_CLASS
:
6892 case X86_64_X87UP_CLASS
:
6893 case X86_64_COMPLEX_X87_CLASS
:
6897 case X86_64_MEMORY_CLASS
:
6904 /* Construct container for the argument used by GCC interface. See
6905 FUNCTION_ARG for the detailed description. */
6908 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6909 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6910 const int *intreg
, int sse_regno
)
6912 /* The following variables hold the static issued_error state. */
6913 static bool issued_sse_arg_error
;
6914 static bool issued_sse_ret_error
;
6915 static bool issued_x87_ret_error
;
6917 enum machine_mode tmpmode
;
6919 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6920 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6924 int needed_sseregs
, needed_intregs
;
6925 rtx exp
[MAX_CLASSES
];
6928 n
= classify_argument (mode
, type
, regclass
, 0);
6931 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
6934 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6937 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6938 some less clueful developer tries to use floating-point anyway. */
6939 if (needed_sseregs
&& !TARGET_SSE
)
6943 if (!issued_sse_ret_error
)
6945 error ("SSE register return with SSE disabled");
6946 issued_sse_ret_error
= true;
6949 else if (!issued_sse_arg_error
)
6951 error ("SSE register argument with SSE disabled");
6952 issued_sse_arg_error
= true;
6957 /* Likewise, error if the ABI requires us to return values in the
6958 x87 registers and the user specified -mno-80387. */
6959 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6960 for (i
= 0; i
< n
; i
++)
6961 if (regclass
[i
] == X86_64_X87_CLASS
6962 || regclass
[i
] == X86_64_X87UP_CLASS
6963 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6965 if (!issued_x87_ret_error
)
6967 error ("x87 register return with x87 disabled");
6968 issued_x87_ret_error
= true;
6973 /* First construct simple cases. Avoid SCmode, since we want to use
6974 single register to pass this type. */
6975 if (n
== 1 && mode
!= SCmode
)
6976 switch (regclass
[0])
6978 case X86_64_INTEGER_CLASS
:
6979 case X86_64_INTEGERSI_CLASS
:
6980 return gen_rtx_REG (mode
, intreg
[0]);
6981 case X86_64_SSE_CLASS
:
6982 case X86_64_SSESF_CLASS
:
6983 case X86_64_SSEDF_CLASS
:
6984 if (mode
!= BLKmode
)
6985 return gen_reg_or_parallel (mode
, orig_mode
,
6986 SSE_REGNO (sse_regno
));
6988 case X86_64_X87_CLASS
:
6989 case X86_64_COMPLEX_X87_CLASS
:
6990 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6991 case X86_64_NO_CLASS
:
6992 /* Zero sized array, struct or class. */
6998 && regclass
[0] == X86_64_SSE_CLASS
6999 && regclass
[1] == X86_64_SSEUP_CLASS
7001 return gen_reg_or_parallel (mode
, orig_mode
,
7002 SSE_REGNO (sse_regno
));
7004 && regclass
[0] == X86_64_SSE_CLASS
7005 && regclass
[1] == X86_64_SSEUP_CLASS
7006 && regclass
[2] == X86_64_SSEUP_CLASS
7007 && regclass
[3] == X86_64_SSEUP_CLASS
7009 return gen_reg_or_parallel (mode
, orig_mode
,
7010 SSE_REGNO (sse_regno
));
7012 && regclass
[0] == X86_64_SSE_CLASS
7013 && regclass
[1] == X86_64_SSEUP_CLASS
7014 && regclass
[2] == X86_64_SSEUP_CLASS
7015 && regclass
[3] == X86_64_SSEUP_CLASS
7016 && regclass
[4] == X86_64_SSEUP_CLASS
7017 && regclass
[5] == X86_64_SSEUP_CLASS
7018 && regclass
[6] == X86_64_SSEUP_CLASS
7019 && regclass
[7] == X86_64_SSEUP_CLASS
7021 return gen_reg_or_parallel (mode
, orig_mode
,
7022 SSE_REGNO (sse_regno
));
7024 && regclass
[0] == X86_64_X87_CLASS
7025 && regclass
[1] == X86_64_X87UP_CLASS
)
7026 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
7029 && regclass
[0] == X86_64_INTEGER_CLASS
7030 && regclass
[1] == X86_64_INTEGER_CLASS
7031 && (mode
== CDImode
|| mode
== TImode
)
7032 && intreg
[0] + 1 == intreg
[1])
7033 return gen_rtx_REG (mode
, intreg
[0]);
7035 /* Otherwise figure out the entries of the PARALLEL. */
7036 for (i
= 0; i
< n
; i
++)
7040 switch (regclass
[i
])
7042 case X86_64_NO_CLASS
:
7044 case X86_64_INTEGER_CLASS
:
7045 case X86_64_INTEGERSI_CLASS
:
7046 /* Merge TImodes on aligned occasions here too. */
7047 if (i
* 8 + 8 > bytes
)
7049 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
7050 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
7054 /* We've requested 24 bytes we
7055 don't have mode for. Use DImode. */
7056 if (tmpmode
== BLKmode
)
7059 = gen_rtx_EXPR_LIST (VOIDmode
,
7060 gen_rtx_REG (tmpmode
, *intreg
),
7064 case X86_64_SSESF_CLASS
:
7066 = gen_rtx_EXPR_LIST (VOIDmode
,
7067 gen_rtx_REG (SFmode
,
7068 SSE_REGNO (sse_regno
)),
7072 case X86_64_SSEDF_CLASS
:
7074 = gen_rtx_EXPR_LIST (VOIDmode
,
7075 gen_rtx_REG (DFmode
,
7076 SSE_REGNO (sse_regno
)),
7080 case X86_64_SSE_CLASS
:
7088 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
7098 && regclass
[1] == X86_64_SSEUP_CLASS
7099 && regclass
[2] == X86_64_SSEUP_CLASS
7100 && regclass
[3] == X86_64_SSEUP_CLASS
);
7106 && regclass
[1] == X86_64_SSEUP_CLASS
7107 && regclass
[2] == X86_64_SSEUP_CLASS
7108 && regclass
[3] == X86_64_SSEUP_CLASS
7109 && regclass
[4] == X86_64_SSEUP_CLASS
7110 && regclass
[5] == X86_64_SSEUP_CLASS
7111 && regclass
[6] == X86_64_SSEUP_CLASS
7112 && regclass
[7] == X86_64_SSEUP_CLASS
);
7120 = gen_rtx_EXPR_LIST (VOIDmode
,
7121 gen_rtx_REG (tmpmode
,
7122 SSE_REGNO (sse_regno
)),
7131 /* Empty aligned struct, union or class. */
7135 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
7136 for (i
= 0; i
< nexps
; i
++)
7137 XVECEXP (ret
, 0, i
) = exp
[i
];
7141 /* Update the data in CUM to advance over an argument of mode MODE
7142 and data type TYPE. (TYPE is null for libcalls where that information
7143 may not be available.) */
7146 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7147 const_tree type
, HOST_WIDE_INT bytes
,
7148 HOST_WIDE_INT words
)
7164 cum
->words
+= words
;
7165 cum
->nregs
-= words
;
7166 cum
->regno
+= words
;
7168 if (cum
->nregs
<= 0)
7176 /* OImode shouldn't be used directly. */
7180 if (cum
->float_in_sse
< 2)
7183 if (cum
->float_in_sse
< 1)
7206 if (!type
|| !AGGREGATE_TYPE_P (type
))
7208 cum
->sse_words
+= words
;
7209 cum
->sse_nregs
-= 1;
7210 cum
->sse_regno
+= 1;
7211 if (cum
->sse_nregs
<= 0)
7225 if (!type
|| !AGGREGATE_TYPE_P (type
))
7227 cum
->mmx_words
+= words
;
7228 cum
->mmx_nregs
-= 1;
7229 cum
->mmx_regno
+= 1;
7230 if (cum
->mmx_nregs
<= 0)
7241 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7242 const_tree type
, HOST_WIDE_INT words
, bool named
)
7244 int int_nregs
, sse_nregs
;
7246 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7247 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
7248 || VALID_AVX256_REG_MODE (mode
)))
7251 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
7252 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
7254 cum
->nregs
-= int_nregs
;
7255 cum
->sse_nregs
-= sse_nregs
;
7256 cum
->regno
+= int_nregs
;
7257 cum
->sse_regno
+= sse_nregs
;
7261 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
7262 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
7263 cum
->words
+= words
;
7268 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
7269 HOST_WIDE_INT words
)
7271 /* Otherwise, this should be passed indirect. */
7272 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
7274 cum
->words
+= words
;
7282 /* Update the data in CUM to advance over an argument of mode MODE and
7283 data type TYPE. (TYPE is null for libcalls where that information
7284 may not be available.) */
7287 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
7288 const_tree type
, bool named
)
7290 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7291 HOST_WIDE_INT bytes
, words
;
7293 if (mode
== BLKmode
)
7294 bytes
= int_size_in_bytes (type
);
7296 bytes
= GET_MODE_SIZE (mode
);
7297 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7300 mode
= type_natural_mode (type
, NULL
, false);
7302 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7303 function_arg_advance_ms_64 (cum
, bytes
, words
);
7304 else if (TARGET_64BIT
)
7305 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
7307 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
7310 /* Define where to put the arguments to a function.
7311 Value is zero to push the argument on the stack,
7312 or a hard register in which to store the argument.
7314 MODE is the argument's machine mode.
7315 TYPE is the data type of the argument (as a tree).
7316 This is null for libcalls where that information may
7318 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7319 the preceding args and about the function being called.
7320 NAMED is nonzero if this argument is a named parameter
7321 (otherwise it is an extra parameter matching an ellipsis). */
7324 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7325 enum machine_mode orig_mode
, const_tree type
,
7326 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
7328 /* Avoid the AL settings for the Unix64 ABI. */
7329 if (mode
== VOIDmode
)
7345 if (words
<= cum
->nregs
)
7347 int regno
= cum
->regno
;
7349 /* Fastcall allocates the first two DWORD (SImode) or
7350 smaller arguments to ECX and EDX if it isn't an
7356 || (type
&& AGGREGATE_TYPE_P (type
)))
7359 /* ECX not EAX is the first allocated register. */
7360 if (regno
== AX_REG
)
7363 return gen_rtx_REG (mode
, regno
);
7368 if (cum
->float_in_sse
< 2)
7371 if (cum
->float_in_sse
< 1)
7375 /* In 32bit, we pass TImode in xmm registers. */
7382 if (!type
|| !AGGREGATE_TYPE_P (type
))
7385 return gen_reg_or_parallel (mode
, orig_mode
,
7386 cum
->sse_regno
+ FIRST_SSE_REG
);
7392 /* OImode and XImode shouldn't be used directly. */
7407 if (!type
|| !AGGREGATE_TYPE_P (type
))
7410 return gen_reg_or_parallel (mode
, orig_mode
,
7411 cum
->sse_regno
+ FIRST_SSE_REG
);
7421 if (!type
|| !AGGREGATE_TYPE_P (type
))
7424 return gen_reg_or_parallel (mode
, orig_mode
,
7425 cum
->mmx_regno
+ FIRST_MMX_REG
);
7434 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7435 enum machine_mode orig_mode
, const_tree type
, bool named
)
7437 /* Handle a hidden AL argument containing number of registers
7438 for varargs x86-64 functions. */
7439 if (mode
== VOIDmode
)
7440 return GEN_INT (cum
->maybe_vaarg
7441 ? (cum
->sse_nregs
< 0
7442 ? X86_64_SSE_REGPARM_MAX
7463 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7469 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7471 &x86_64_int_parameter_registers
[cum
->regno
],
7476 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7477 enum machine_mode orig_mode
, bool named
,
7478 HOST_WIDE_INT bytes
)
7482 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7483 We use value of -2 to specify that current function call is MSABI. */
7484 if (mode
== VOIDmode
)
7485 return GEN_INT (-2);
7487 /* If we've run out of registers, it goes on the stack. */
7488 if (cum
->nregs
== 0)
7491 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7493 /* Only floating point modes are passed in anything but integer regs. */
7494 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7497 regno
= cum
->regno
+ FIRST_SSE_REG
;
7502 /* Unnamed floating parameters are passed in both the
7503 SSE and integer registers. */
7504 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7505 t2
= gen_rtx_REG (mode
, regno
);
7506 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7507 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7508 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7511 /* Handle aggregated types passed in register. */
7512 if (orig_mode
== BLKmode
)
7514 if (bytes
> 0 && bytes
<= 8)
7515 mode
= (bytes
> 4 ? DImode
: SImode
);
7516 if (mode
== BLKmode
)
7520 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7523 /* Return where to put the arguments to a function.
7524 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7526 MODE is the argument's machine mode. TYPE is the data type of the
7527 argument. It is null for libcalls where that information may not be
7528 available. CUM gives information about the preceding args and about
7529 the function being called. NAMED is nonzero if this argument is a
7530 named parameter (otherwise it is an extra parameter matching an
7534 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7535 const_tree type
, bool named
)
7537 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7538 enum machine_mode mode
= omode
;
7539 HOST_WIDE_INT bytes
, words
;
7542 if (mode
== BLKmode
)
7543 bytes
= int_size_in_bytes (type
);
7545 bytes
= GET_MODE_SIZE (mode
);
7546 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7548 /* To simplify the code below, represent vector types with a vector mode
7549 even if MMX/SSE are not active. */
7550 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7551 mode
= type_natural_mode (type
, cum
, false);
7553 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7554 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7555 else if (TARGET_64BIT
)
7556 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7558 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7563 /* A C expression that indicates when an argument must be passed by
7564 reference. If nonzero for an argument, a copy of that argument is
7565 made in memory and a pointer to the argument is passed instead of
7566 the argument itself. The pointer is passed in whatever way is
7567 appropriate for passing a pointer to that type. */
7570 ix86_pass_by_reference (cumulative_args_t cum_v
, enum machine_mode mode
,
7571 const_tree type
, bool)
7573 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7575 /* See Windows x64 Software Convention. */
7576 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7578 int msize
= (int) GET_MODE_SIZE (mode
);
7581 /* Arrays are passed by reference. */
7582 if (TREE_CODE (type
) == ARRAY_TYPE
)
7585 if (AGGREGATE_TYPE_P (type
))
7587 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7588 are passed by reference. */
7589 msize
= int_size_in_bytes (type
);
7593 /* __m128 is passed by reference. */
7595 case 1: case 2: case 4: case 8:
7601 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7607 /* Return true when TYPE should be 128bit aligned for 32bit argument
7608 passing ABI. XXX: This function is obsolete and is only used for
7609 checking psABI compatibility with previous versions of GCC. */
7612 ix86_compat_aligned_value_p (const_tree type
)
7614 enum machine_mode mode
= TYPE_MODE (type
);
7615 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7619 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7621 if (TYPE_ALIGN (type
) < 128)
7624 if (AGGREGATE_TYPE_P (type
))
7626 /* Walk the aggregates recursively. */
7627 switch (TREE_CODE (type
))
7631 case QUAL_UNION_TYPE
:
7635 /* Walk all the structure fields. */
7636 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7638 if (TREE_CODE (field
) == FIELD_DECL
7639 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7646 /* Just for use if some languages passes arrays by value. */
7647 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7658 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7659 XXX: This function is obsolete and is only used for checking psABI
7660 compatibility with previous versions of GCC. */
7663 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7664 const_tree type
, unsigned int align
)
7666 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7667 natural boundaries. */
7668 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7670 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7671 make an exception for SSE modes since these require 128bit
7674 The handling here differs from field_alignment. ICC aligns MMX
7675 arguments to 4 byte boundaries, while structure fields are aligned
7676 to 8 byte boundaries. */
7679 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7680 align
= PARM_BOUNDARY
;
7684 if (!ix86_compat_aligned_value_p (type
))
7685 align
= PARM_BOUNDARY
;
7688 if (align
> BIGGEST_ALIGNMENT
)
7689 align
= BIGGEST_ALIGNMENT
;
7693 /* Return true when TYPE should be 128bit aligned for 32bit argument
7697 ix86_contains_aligned_value_p (const_tree type
)
7699 enum machine_mode mode
= TYPE_MODE (type
);
7701 if (mode
== XFmode
|| mode
== XCmode
)
7704 if (TYPE_ALIGN (type
) < 128)
7707 if (AGGREGATE_TYPE_P (type
))
7709 /* Walk the aggregates recursively. */
7710 switch (TREE_CODE (type
))
7714 case QUAL_UNION_TYPE
:
7718 /* Walk all the structure fields. */
7719 for (field
= TYPE_FIELDS (type
);
7721 field
= DECL_CHAIN (field
))
7723 if (TREE_CODE (field
) == FIELD_DECL
7724 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7731 /* Just for use if some languages passes arrays by value. */
7732 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7741 return TYPE_ALIGN (type
) >= 128;
7746 /* Gives the alignment boundary, in bits, of an argument with the
7747 specified mode and type. */
7750 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7755 /* Since the main variant type is used for call, we convert it to
7756 the main variant type. */
7757 type
= TYPE_MAIN_VARIANT (type
);
7758 align
= TYPE_ALIGN (type
);
7761 align
= GET_MODE_ALIGNMENT (mode
);
7762 if (align
< PARM_BOUNDARY
)
7763 align
= PARM_BOUNDARY
;
7767 unsigned int saved_align
= align
;
7771 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7774 if (mode
== XFmode
|| mode
== XCmode
)
7775 align
= PARM_BOUNDARY
;
7777 else if (!ix86_contains_aligned_value_p (type
))
7778 align
= PARM_BOUNDARY
;
7781 align
= PARM_BOUNDARY
;
7786 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7790 inform (input_location
,
7791 "The ABI for passing parameters with %d-byte"
7792 " alignment has changed in GCC 4.6",
7793 align
/ BITS_PER_UNIT
);
7800 /* Return true if N is a possible register number of function value. */
7803 ix86_function_value_regno_p (const unsigned int regno
)
7810 return (!TARGET_64BIT
|| ix86_abi
!= MS_ABI
);
7813 return TARGET_64BIT
&& ix86_abi
!= MS_ABI
;
7815 /* Complex values are returned in %st(0)/%st(1) pair. */
7818 /* TODO: The function should depend on current function ABI but
7819 builtins.c would need updating then. Therefore we use the
7821 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7823 return TARGET_FLOAT_RETURNS_IN_80387
;
7825 /* Complex values are returned in %xmm0/%xmm1 pair. */
7831 if (TARGET_MACHO
|| TARGET_64BIT
)
7839 /* Define how to find the value returned by a function.
7840 VALTYPE is the data type of the value (as a tree).
7841 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7842 otherwise, FUNC is 0. */
7845 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7846 const_tree fntype
, const_tree fn
)
7850 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7851 we normally prevent this case when mmx is not available. However
7852 some ABIs may require the result to be returned like DImode. */
7853 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7854 regno
= FIRST_MMX_REG
;
7856 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7857 we prevent this case when sse is not available. However some ABIs
7858 may require the result to be returned like integer TImode. */
7859 else if (mode
== TImode
7860 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7861 regno
= FIRST_SSE_REG
;
7863 /* 32-byte vector modes in %ymm0. */
7864 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7865 regno
= FIRST_SSE_REG
;
7867 /* 64-byte vector modes in %zmm0. */
7868 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
7869 regno
= FIRST_SSE_REG
;
7871 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7872 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7873 regno
= FIRST_FLOAT_REG
;
7875 /* Most things go in %eax. */
7878 /* Override FP return register with %xmm0 for local functions when
7879 SSE math is enabled or for functions with sseregparm attribute. */
7880 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7882 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7883 if ((sse_level
>= 1 && mode
== SFmode
)
7884 || (sse_level
== 2 && mode
== DFmode
))
7885 regno
= FIRST_SSE_REG
;
7888 /* OImode shouldn't be used directly. */
7889 gcc_assert (mode
!= OImode
);
7891 return gen_rtx_REG (orig_mode
, regno
);
7895 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7900 /* Handle libcalls, which don't provide a type node. */
7901 if (valtype
== NULL
)
7915 regno
= FIRST_SSE_REG
;
7919 regno
= FIRST_FLOAT_REG
;
7927 return gen_rtx_REG (mode
, regno
);
7929 else if (POINTER_TYPE_P (valtype
))
7931 /* Pointers are always returned in word_mode. */
7935 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7936 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7937 x86_64_int_return_registers
, 0);
7939 /* For zero sized structures, construct_container returns NULL, but we
7940 need to keep rest of compiler happy by returning meaningful value. */
7942 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7948 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7951 unsigned int regno
= AX_REG
;
7955 switch (GET_MODE_SIZE (mode
))
7958 if (valtype
!= NULL_TREE
7959 && !VECTOR_INTEGER_TYPE_P (valtype
)
7960 && !VECTOR_INTEGER_TYPE_P (valtype
)
7961 && !INTEGRAL_TYPE_P (valtype
)
7962 && !VECTOR_FLOAT_TYPE_P (valtype
))
7964 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7965 && !COMPLEX_MODE_P (mode
))
7966 regno
= FIRST_SSE_REG
;
7970 if (mode
== SFmode
|| mode
== DFmode
)
7971 regno
= FIRST_SSE_REG
;
7977 return gen_rtx_REG (orig_mode
, regno
);
7981 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7982 enum machine_mode orig_mode
, enum machine_mode mode
)
7984 const_tree fn
, fntype
;
7987 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7988 fn
= fntype_or_decl
;
7989 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7991 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7992 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7993 else if (TARGET_64BIT
)
7994 return function_value_64 (orig_mode
, mode
, valtype
);
7996 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
8000 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
8002 enum machine_mode mode
, orig_mode
;
8004 orig_mode
= TYPE_MODE (valtype
);
8005 mode
= type_natural_mode (valtype
, NULL
, true);
8006 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
8009 /* Pointer function arguments and return values are promoted to
8012 static enum machine_mode
8013 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
8014 int *punsignedp
, const_tree fntype
,
8017 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
8019 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
8022 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
8026 /* Return true if a structure, union or array with MODE containing FIELD
8027 should be accessed using BLKmode. */
8030 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
8032 /* Union with XFmode must be in BLKmode. */
8033 return (mode
== XFmode
8034 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
8035 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
8039 ix86_libcall_value (enum machine_mode mode
)
8041 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
8044 /* Return true iff type is returned in memory. */
8047 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
8049 #ifdef SUBTARGET_RETURN_IN_MEMORY
8050 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
8052 const enum machine_mode mode
= type_natural_mode (type
, NULL
, true);
8057 if (ix86_function_type_abi (fntype
) == MS_ABI
)
8059 size
= int_size_in_bytes (type
);
8061 /* __m128 is returned in xmm0. */
8062 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
8063 || INTEGRAL_TYPE_P (type
)
8064 || VECTOR_FLOAT_TYPE_P (type
))
8065 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
8066 && !COMPLEX_MODE_P (mode
)
8067 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
8070 /* Otherwise, the size must be exactly in [1248]. */
8071 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
8075 int needed_intregs
, needed_sseregs
;
8077 return examine_argument (mode
, type
, 1,
8078 &needed_intregs
, &needed_sseregs
);
8083 if (mode
== BLKmode
)
8086 size
= int_size_in_bytes (type
);
8088 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
8091 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
8093 /* User-created vectors small enough to fit in EAX. */
8097 /* Unless ABI prescibes otherwise,
8098 MMX/3dNow values are returned in MM0 if available. */
8101 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
8103 /* SSE values are returned in XMM0 if available. */
8107 /* AVX values are returned in YMM0 if available. */
8111 /* AVX512F values are returned in ZMM0 if available. */
8113 return !TARGET_AVX512F
;
8122 /* OImode shouldn't be used directly. */
8123 gcc_assert (mode
!= OImode
);
8131 /* Create the va_list data type. */
8133 /* Returns the calling convention specific va_list date type.
8134 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8137 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
8139 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
8141 /* For i386 we use plain pointer to argument area. */
8142 if (!TARGET_64BIT
|| abi
== MS_ABI
)
8143 return build_pointer_type (char_type_node
);
8145 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
8146 type_decl
= build_decl (BUILTINS_LOCATION
,
8147 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
8149 f_gpr
= build_decl (BUILTINS_LOCATION
,
8150 FIELD_DECL
, get_identifier ("gp_offset"),
8151 unsigned_type_node
);
8152 f_fpr
= build_decl (BUILTINS_LOCATION
,
8153 FIELD_DECL
, get_identifier ("fp_offset"),
8154 unsigned_type_node
);
8155 f_ovf
= build_decl (BUILTINS_LOCATION
,
8156 FIELD_DECL
, get_identifier ("overflow_arg_area"),
8158 f_sav
= build_decl (BUILTINS_LOCATION
,
8159 FIELD_DECL
, get_identifier ("reg_save_area"),
8162 va_list_gpr_counter_field
= f_gpr
;
8163 va_list_fpr_counter_field
= f_fpr
;
8165 DECL_FIELD_CONTEXT (f_gpr
) = record
;
8166 DECL_FIELD_CONTEXT (f_fpr
) = record
;
8167 DECL_FIELD_CONTEXT (f_ovf
) = record
;
8168 DECL_FIELD_CONTEXT (f_sav
) = record
;
8170 TYPE_STUB_DECL (record
) = type_decl
;
8171 TYPE_NAME (record
) = type_decl
;
8172 TYPE_FIELDS (record
) = f_gpr
;
8173 DECL_CHAIN (f_gpr
) = f_fpr
;
8174 DECL_CHAIN (f_fpr
) = f_ovf
;
8175 DECL_CHAIN (f_ovf
) = f_sav
;
8177 layout_type (record
);
8179 /* The correct type is an array type of one element. */
8180 return build_array_type (record
, build_index_type (size_zero_node
));
8183 /* Setup the builtin va_list data type and for 64-bit the additional
8184 calling convention specific va_list data types. */
8187 ix86_build_builtin_va_list (void)
8189 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
8191 /* Initialize abi specific va_list builtin types. */
8195 if (ix86_abi
== MS_ABI
)
8197 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
8198 if (TREE_CODE (t
) != RECORD_TYPE
)
8199 t
= build_variant_type_copy (t
);
8200 sysv_va_list_type_node
= t
;
8205 if (TREE_CODE (t
) != RECORD_TYPE
)
8206 t
= build_variant_type_copy (t
);
8207 sysv_va_list_type_node
= t
;
8209 if (ix86_abi
!= MS_ABI
)
8211 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
8212 if (TREE_CODE (t
) != RECORD_TYPE
)
8213 t
= build_variant_type_copy (t
);
8214 ms_va_list_type_node
= t
;
8219 if (TREE_CODE (t
) != RECORD_TYPE
)
8220 t
= build_variant_type_copy (t
);
8221 ms_va_list_type_node
= t
;
8228 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8231 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
8237 /* GPR size of varargs save area. */
8238 if (cfun
->va_list_gpr_size
)
8239 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
8241 ix86_varargs_gpr_size
= 0;
8243 /* FPR size of varargs save area. We don't need it if we don't pass
8244 anything in SSE registers. */
8245 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8246 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
8248 ix86_varargs_fpr_size
= 0;
8250 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
8253 save_area
= frame_pointer_rtx
;
8254 set
= get_varargs_alias_set ();
8256 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
8257 if (max
> X86_64_REGPARM_MAX
)
8258 max
= X86_64_REGPARM_MAX
;
8260 for (i
= cum
->regno
; i
< max
; i
++)
8262 mem
= gen_rtx_MEM (word_mode
,
8263 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
8264 MEM_NOTRAP_P (mem
) = 1;
8265 set_mem_alias_set (mem
, set
);
8266 emit_move_insn (mem
,
8267 gen_rtx_REG (word_mode
,
8268 x86_64_int_parameter_registers
[i
]));
8271 if (ix86_varargs_fpr_size
)
8273 enum machine_mode smode
;
8274 rtx_code_label
*label
;
8277 /* Now emit code to save SSE registers. The AX parameter contains number
8278 of SSE parameter registers used to call this function, though all we
8279 actually check here is the zero/non-zero status. */
8281 label
= gen_label_rtx ();
8282 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
8283 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
8286 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8287 we used movdqa (i.e. TImode) instead? Perhaps even better would
8288 be if we could determine the real mode of the data, via a hook
8289 into pass_stdarg. Ignore all that for now. */
8291 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
8292 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
8294 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
8295 if (max
> X86_64_SSE_REGPARM_MAX
)
8296 max
= X86_64_SSE_REGPARM_MAX
;
8298 for (i
= cum
->sse_regno
; i
< max
; ++i
)
8300 mem
= plus_constant (Pmode
, save_area
,
8301 i
* 16 + ix86_varargs_gpr_size
);
8302 mem
= gen_rtx_MEM (smode
, mem
);
8303 MEM_NOTRAP_P (mem
) = 1;
8304 set_mem_alias_set (mem
, set
);
8305 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
8307 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
8315 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
8317 alias_set_type set
= get_varargs_alias_set ();
8320 /* Reset to zero, as there might be a sysv vaarg used
8322 ix86_varargs_gpr_size
= 0;
8323 ix86_varargs_fpr_size
= 0;
8325 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
8329 mem
= gen_rtx_MEM (Pmode
,
8330 plus_constant (Pmode
, virtual_incoming_args_rtx
,
8331 i
* UNITS_PER_WORD
));
8332 MEM_NOTRAP_P (mem
) = 1;
8333 set_mem_alias_set (mem
, set
);
8335 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
8336 emit_move_insn (mem
, reg
);
8341 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
8342 tree type
, int *, int no_rtl
)
8344 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8345 CUMULATIVE_ARGS next_cum
;
8348 /* This argument doesn't appear to be used anymore. Which is good,
8349 because the old code here didn't suppress rtl generation. */
8350 gcc_assert (!no_rtl
);
8355 fntype
= TREE_TYPE (current_function_decl
);
8357 /* For varargs, we do not want to skip the dummy va_dcl argument.
8358 For stdargs, we do want to skip the last named argument. */
8360 if (stdarg_p (fntype
))
8361 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
8364 if (cum
->call_abi
== MS_ABI
)
8365 setup_incoming_varargs_ms_64 (&next_cum
);
8367 setup_incoming_varargs_64 (&next_cum
);
8370 /* Checks if TYPE is of kind va_list char *. */
8373 is_va_list_char_pointer (tree type
)
8377 /* For 32-bit it is always true. */
8380 canonic
= ix86_canonical_va_list_type (type
);
8381 return (canonic
== ms_va_list_type_node
8382 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8385 /* Implement va_start. */
8388 ix86_va_start (tree valist
, rtx nextarg
)
8390 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8391 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8392 tree gpr
, fpr
, ovf
, sav
, t
;
8396 if (flag_split_stack
8397 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8399 unsigned int scratch_regno
;
8401 /* When we are splitting the stack, we can't refer to the stack
8402 arguments using internal_arg_pointer, because they may be on
8403 the old stack. The split stack prologue will arrange to
8404 leave a pointer to the old stack arguments in a scratch
8405 register, which we here copy to a pseudo-register. The split
8406 stack prologue can't set the pseudo-register directly because
8407 it (the prologue) runs before any registers have been saved. */
8409 scratch_regno
= split_stack_prologue_scratch_regno ();
8410 if (scratch_regno
!= INVALID_REGNUM
)
8415 reg
= gen_reg_rtx (Pmode
);
8416 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8419 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8423 push_topmost_sequence ();
8424 emit_insn_after (seq
, entry_of_function ());
8425 pop_topmost_sequence ();
8429 /* Only 64bit target needs something special. */
8430 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8432 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8433 std_expand_builtin_va_start (valist
, nextarg
);
8438 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8439 next
= expand_binop (ptr_mode
, add_optab
,
8440 cfun
->machine
->split_stack_varargs_pointer
,
8441 crtl
->args
.arg_offset_rtx
,
8442 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8443 convert_move (va_r
, next
, 0);
8448 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8449 f_fpr
= DECL_CHAIN (f_gpr
);
8450 f_ovf
= DECL_CHAIN (f_fpr
);
8451 f_sav
= DECL_CHAIN (f_ovf
);
8453 valist
= build_simple_mem_ref (valist
);
8454 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8455 /* The following should be folded into the MEM_REF offset. */
8456 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8458 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8460 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8462 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8465 /* Count number of gp and fp argument registers used. */
8466 words
= crtl
->args
.info
.words
;
8467 n_gpr
= crtl
->args
.info
.regno
;
8468 n_fpr
= crtl
->args
.info
.sse_regno
;
8470 if (cfun
->va_list_gpr_size
)
8472 type
= TREE_TYPE (gpr
);
8473 t
= build2 (MODIFY_EXPR
, type
,
8474 gpr
, build_int_cst (type
, n_gpr
* 8));
8475 TREE_SIDE_EFFECTS (t
) = 1;
8476 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8479 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8481 type
= TREE_TYPE (fpr
);
8482 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8483 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8484 TREE_SIDE_EFFECTS (t
) = 1;
8485 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8488 /* Find the overflow area. */
8489 type
= TREE_TYPE (ovf
);
8490 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8491 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8493 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8494 t
= make_tree (type
, ovf_rtx
);
8496 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8497 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8498 TREE_SIDE_EFFECTS (t
) = 1;
8499 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8501 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8503 /* Find the register save area.
8504 Prologue of the function save it right above stack frame. */
8505 type
= TREE_TYPE (sav
);
8506 t
= make_tree (type
, frame_pointer_rtx
);
8507 if (!ix86_varargs_gpr_size
)
8508 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8509 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8510 TREE_SIDE_EFFECTS (t
) = 1;
8511 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8515 /* Implement va_arg. */
8518 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8521 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8522 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8523 tree gpr
, fpr
, ovf
, sav
, t
;
8525 tree lab_false
, lab_over
= NULL_TREE
;
8530 enum machine_mode nat_mode
;
8531 unsigned int arg_boundary
;
8533 /* Only 64bit target needs something special. */
8534 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8535 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8537 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8538 f_fpr
= DECL_CHAIN (f_gpr
);
8539 f_ovf
= DECL_CHAIN (f_fpr
);
8540 f_sav
= DECL_CHAIN (f_ovf
);
8542 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8543 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8544 valist
= build_va_arg_indirect_ref (valist
);
8545 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8546 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8547 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8549 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8551 type
= build_pointer_type (type
);
8552 size
= int_size_in_bytes (type
);
8553 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8555 nat_mode
= type_natural_mode (type
, NULL
, false);
8570 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8571 if (!TARGET_64BIT_MS_ABI
)
8578 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8579 type
, 0, X86_64_REGPARM_MAX
,
8580 X86_64_SSE_REGPARM_MAX
, intreg
,
8585 /* Pull the value out of the saved registers. */
8587 addr
= create_tmp_var (ptr_type_node
, "addr");
8591 int needed_intregs
, needed_sseregs
;
8593 tree int_addr
, sse_addr
;
8595 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8596 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8598 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8600 need_temp
= (!REG_P (container
)
8601 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8602 || TYPE_ALIGN (type
) > 128));
8604 /* In case we are passing structure, verify that it is consecutive block
8605 on the register save area. If not we need to do moves. */
8606 if (!need_temp
&& !REG_P (container
))
8608 /* Verify that all registers are strictly consecutive */
8609 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8613 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8615 rtx slot
= XVECEXP (container
, 0, i
);
8616 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8617 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8625 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8627 rtx slot
= XVECEXP (container
, 0, i
);
8628 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8629 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8641 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8642 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8645 /* First ensure that we fit completely in registers. */
8648 t
= build_int_cst (TREE_TYPE (gpr
),
8649 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8650 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8651 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8652 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8653 gimplify_and_add (t
, pre_p
);
8657 t
= build_int_cst (TREE_TYPE (fpr
),
8658 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8659 + X86_64_REGPARM_MAX
* 8);
8660 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8661 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8662 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8663 gimplify_and_add (t
, pre_p
);
8666 /* Compute index to start of area used for integer regs. */
8669 /* int_addr = gpr + sav; */
8670 t
= fold_build_pointer_plus (sav
, gpr
);
8671 gimplify_assign (int_addr
, t
, pre_p
);
8675 /* sse_addr = fpr + sav; */
8676 t
= fold_build_pointer_plus (sav
, fpr
);
8677 gimplify_assign (sse_addr
, t
, pre_p
);
8681 int i
, prev_size
= 0;
8682 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8685 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8686 gimplify_assign (addr
, t
, pre_p
);
8688 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8690 rtx slot
= XVECEXP (container
, 0, i
);
8691 rtx reg
= XEXP (slot
, 0);
8692 enum machine_mode mode
= GET_MODE (reg
);
8698 tree dest_addr
, dest
;
8699 int cur_size
= GET_MODE_SIZE (mode
);
8701 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8702 prev_size
= INTVAL (XEXP (slot
, 1));
8703 if (prev_size
+ cur_size
> size
)
8705 cur_size
= size
- prev_size
;
8706 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8707 if (mode
== BLKmode
)
8710 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8711 if (mode
== GET_MODE (reg
))
8712 addr_type
= build_pointer_type (piece_type
);
8714 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8716 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8719 if (SSE_REGNO_P (REGNO (reg
)))
8721 src_addr
= sse_addr
;
8722 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8726 src_addr
= int_addr
;
8727 src_offset
= REGNO (reg
) * 8;
8729 src_addr
= fold_convert (addr_type
, src_addr
);
8730 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8732 dest_addr
= fold_convert (daddr_type
, addr
);
8733 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8734 if (cur_size
== GET_MODE_SIZE (mode
))
8736 src
= build_va_arg_indirect_ref (src_addr
);
8737 dest
= build_va_arg_indirect_ref (dest_addr
);
8739 gimplify_assign (dest
, src
, pre_p
);
8744 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8745 3, dest_addr
, src_addr
,
8746 size_int (cur_size
));
8747 gimplify_and_add (copy
, pre_p
);
8749 prev_size
+= cur_size
;
8755 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8756 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8757 gimplify_assign (gpr
, t
, pre_p
);
8762 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8763 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8764 gimplify_assign (fpr
, t
, pre_p
);
8767 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8769 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8772 /* ... otherwise out of the overflow area. */
8774 /* When we align parameter on stack for caller, if the parameter
8775 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8776 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8777 here with caller. */
8778 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8779 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8780 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8782 /* Care for on-stack alignment if needed. */
8783 if (arg_boundary
<= 64 || size
== 0)
8787 HOST_WIDE_INT align
= arg_boundary
/ 8;
8788 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8789 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8790 build_int_cst (TREE_TYPE (t
), -align
));
8793 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8794 gimplify_assign (addr
, t
, pre_p
);
8796 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8797 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8800 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8802 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8803 addr
= fold_convert (ptrtype
, addr
);
8806 addr
= build_va_arg_indirect_ref (addr
);
8807 return build_va_arg_indirect_ref (addr
);
8810 /* Return true if OPNUM's MEM should be matched
8811 in movabs* patterns. */
8814 ix86_check_movabs (rtx insn
, int opnum
)
8818 set
= PATTERN (insn
);
8819 if (GET_CODE (set
) == PARALLEL
)
8820 set
= XVECEXP (set
, 0, 0);
8821 gcc_assert (GET_CODE (set
) == SET
);
8822 mem
= XEXP (set
, opnum
);
8823 while (GET_CODE (mem
) == SUBREG
)
8824 mem
= SUBREG_REG (mem
);
8825 gcc_assert (MEM_P (mem
));
8826 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8829 /* Initialize the table of extra 80387 mathematical constants. */
8832 init_ext_80387_constants (void)
8834 static const char * cst
[5] =
8836 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8837 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8838 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8839 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8840 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8844 for (i
= 0; i
< 5; i
++)
8846 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8847 /* Ensure each constant is rounded to XFmode precision. */
8848 real_convert (&ext_80387_constants_table
[i
],
8849 XFmode
, &ext_80387_constants_table
[i
]);
8852 ext_80387_constants_init
= 1;
8855 /* Return non-zero if the constant is something that
8856 can be loaded with a special instruction. */
8859 standard_80387_constant_p (rtx x
)
8861 enum machine_mode mode
= GET_MODE (x
);
8865 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8868 if (x
== CONST0_RTX (mode
))
8870 if (x
== CONST1_RTX (mode
))
8873 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8875 /* For XFmode constants, try to find a special 80387 instruction when
8876 optimizing for size or on those CPUs that benefit from them. */
8878 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8882 if (! ext_80387_constants_init
)
8883 init_ext_80387_constants ();
8885 for (i
= 0; i
< 5; i
++)
8886 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8890 /* Load of the constant -0.0 or -1.0 will be split as
8891 fldz;fchs or fld1;fchs sequence. */
8892 if (real_isnegzero (&r
))
8894 if (real_identical (&r
, &dconstm1
))
8900 /* Return the opcode of the special instruction to be used to load
8904 standard_80387_constant_opcode (rtx x
)
8906 switch (standard_80387_constant_p (x
))
8930 /* Return the CONST_DOUBLE representing the 80387 constant that is
8931 loaded by the specified special instruction. The argument IDX
8932 matches the return value from standard_80387_constant_p. */
8935 standard_80387_constant_rtx (int idx
)
8939 if (! ext_80387_constants_init
)
8940 init_ext_80387_constants ();
8956 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8960 /* Return 1 if X is all 0s and 2 if x is all 1s
8961 in supported SSE/AVX vector mode. */
8964 standard_sse_constant_p (rtx x
)
8966 enum machine_mode mode
= GET_MODE (x
);
8968 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8970 if (vector_all_ones_operand (x
, mode
))
8998 /* Return the opcode of the special instruction to be used to load
9002 standard_sse_constant_opcode (rtx_insn
*insn
, rtx x
)
9004 switch (standard_sse_constant_p (x
))
9007 switch (get_attr_mode (insn
))
9010 return "vpxord\t%g0, %g0, %g0";
9012 return TARGET_AVX512DQ
? "vxorps\t%g0, %g0, %g0"
9013 : "vpxord\t%g0, %g0, %g0";
9015 return TARGET_AVX512DQ
? "vxorpd\t%g0, %g0, %g0"
9016 : "vpxorq\t%g0, %g0, %g0";
9018 return TARGET_AVX512VL
? "vpxord\t%t0, %t0, %t0"
9019 : "%vpxor\t%0, %d0";
9021 return "%vxorpd\t%0, %d0";
9023 return "%vxorps\t%0, %d0";
9026 return TARGET_AVX512VL
? "vpxord\t%x0, %x0, %x0"
9027 : "vpxor\t%x0, %x0, %x0";
9029 return "vxorpd\t%x0, %x0, %x0";
9031 return "vxorps\t%x0, %x0, %x0";
9039 || get_attr_mode (insn
) == MODE_XI
9040 || get_attr_mode (insn
) == MODE_V8DF
9041 || get_attr_mode (insn
) == MODE_V16SF
)
9042 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9044 return "vpcmpeqd\t%0, %0, %0";
9046 return "pcmpeqd\t%0, %0";
9054 /* Returns true if OP contains a symbol reference */
9057 symbolic_reference_mentioned_p (rtx op
)
9062 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
9065 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
9066 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
9072 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
9073 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
9077 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
9084 /* Return true if it is appropriate to emit `ret' instructions in the
9085 body of a function. Do this only if the epilogue is simple, needing a
9086 couple of insns. Prior to reloading, we can't tell how many registers
9087 must be saved, so return false then. Return false if there is no frame
9088 marker to de-allocate. */
9091 ix86_can_use_return_insn_p (void)
9093 struct ix86_frame frame
;
9095 if (! reload_completed
|| frame_pointer_needed
)
9098 /* Don't allow more than 32k pop, since that's all we can do
9099 with one instruction. */
9100 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
9103 ix86_compute_frame_layout (&frame
);
9104 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
9105 && (frame
.nregs
+ frame
.nsseregs
) == 0);
9108 /* Value should be nonzero if functions must have frame pointers.
9109 Zero means the frame pointer need not be set up (and parms may
9110 be accessed via the stack pointer) in functions that seem suitable. */
9113 ix86_frame_pointer_required (void)
9115 /* If we accessed previous frames, then the generated code expects
9116 to be able to access the saved ebp value in our frame. */
9117 if (cfun
->machine
->accesses_prev_frame
)
9120 /* Several x86 os'es need a frame pointer for other reasons,
9121 usually pertaining to setjmp. */
9122 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
9125 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9126 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
9129 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9130 allocation is 4GB. */
9131 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
9134 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9135 turns off the frame pointer by default. Turn it back on now if
9136 we've not got a leaf function. */
9137 if (TARGET_OMIT_LEAF_FRAME_POINTER
9139 || ix86_current_function_calls_tls_descriptor
))
9142 if (crtl
->profile
&& !flag_fentry
)
9148 /* Record that the current function accesses previous call frames. */
9151 ix86_setup_frame_addresses (void)
9153 cfun
->machine
->accesses_prev_frame
= 1;
9156 #ifndef USE_HIDDEN_LINKONCE
9157 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9158 # define USE_HIDDEN_LINKONCE 1
9160 # define USE_HIDDEN_LINKONCE 0
9164 static int pic_labels_used
;
9166 /* Fills in the label name that should be used for a pc thunk for
9167 the given register. */
9170 get_pc_thunk_name (char name
[32], unsigned int regno
)
9172 gcc_assert (!TARGET_64BIT
);
9174 if (USE_HIDDEN_LINKONCE
)
9175 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
9177 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
9181 /* This function generates code for -fpic that loads %ebx with
9182 the return address of the caller and then returns. */
9185 ix86_code_end (void)
9190 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
9195 if (!(pic_labels_used
& (1 << regno
)))
9198 get_pc_thunk_name (name
, regno
);
9200 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
9201 get_identifier (name
),
9202 build_function_type_list (void_type_node
, NULL_TREE
));
9203 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
9204 NULL_TREE
, void_type_node
);
9205 TREE_PUBLIC (decl
) = 1;
9206 TREE_STATIC (decl
) = 1;
9207 DECL_IGNORED_P (decl
) = 1;
9212 switch_to_section (darwin_sections
[text_coal_section
]);
9213 fputs ("\t.weak_definition\t", asm_out_file
);
9214 assemble_name (asm_out_file
, name
);
9215 fputs ("\n\t.private_extern\t", asm_out_file
);
9216 assemble_name (asm_out_file
, name
);
9217 putc ('\n', asm_out_file
);
9218 ASM_OUTPUT_LABEL (asm_out_file
, name
);
9219 DECL_WEAK (decl
) = 1;
9223 if (USE_HIDDEN_LINKONCE
)
9225 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
9227 targetm
.asm_out
.unique_section (decl
, 0);
9228 switch_to_section (get_named_section (decl
, NULL
, 0));
9230 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
9231 fputs ("\t.hidden\t", asm_out_file
);
9232 assemble_name (asm_out_file
, name
);
9233 putc ('\n', asm_out_file
);
9234 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
9238 switch_to_section (text_section
);
9239 ASM_OUTPUT_LABEL (asm_out_file
, name
);
9242 DECL_INITIAL (decl
) = make_node (BLOCK
);
9243 current_function_decl
= decl
;
9244 init_function_start (decl
);
9245 first_function_block_is_cold
= false;
9246 /* Make sure unwind info is emitted for the thunk if needed. */
9247 final_start_function (emit_barrier (), asm_out_file
, 1);
9249 /* Pad stack IP move with 4 instructions (two NOPs count
9250 as one instruction). */
9251 if (TARGET_PAD_SHORT_FUNCTION
)
9256 fputs ("\tnop\n", asm_out_file
);
9259 xops
[0] = gen_rtx_REG (Pmode
, regno
);
9260 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
9261 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
9262 fputs ("\tret\n", asm_out_file
);
9263 final_end_function ();
9264 init_insn_lengths ();
9265 free_after_compilation (cfun
);
9267 current_function_decl
= NULL
;
9270 if (flag_split_stack
)
9271 file_end_indicate_split_stack ();
9274 /* Emit code for the SET_GOT patterns. */
9277 output_set_got (rtx dest
, rtx label
)
9283 if (TARGET_VXWORKS_RTP
&& flag_pic
)
9285 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9286 xops
[2] = gen_rtx_MEM (Pmode
,
9287 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
9288 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
9290 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9291 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9292 an unadorned address. */
9293 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
9294 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
9295 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
9299 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
9304 /* We don't need a pic base, we're not producing pic. */
9307 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
9308 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
9309 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9310 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
9315 get_pc_thunk_name (name
, REGNO (dest
));
9316 pic_labels_used
|= 1 << REGNO (dest
);
9318 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
9319 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
9320 output_asm_insn ("call\t%X2", xops
);
9323 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9324 This is what will be referenced by the Mach-O PIC subsystem. */
9325 if (machopic_should_output_picbase_label () || !label
)
9326 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
9328 /* When we are restoring the pic base at the site of a nonlocal label,
9329 and we decided to emit the pic base above, we will still output a
9330 local label used for calculating the correction offset (even though
9331 the offset will be 0 in that case). */
9333 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9334 CODE_LABEL_NUMBER (label
));
9339 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
9344 /* Generate an "push" pattern for input ARG. */
9349 struct machine_function
*m
= cfun
->machine
;
9351 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9352 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9353 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
9355 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9356 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9358 return gen_rtx_SET (VOIDmode
,
9359 gen_rtx_MEM (word_mode
,
9360 gen_rtx_PRE_DEC (Pmode
,
9361 stack_pointer_rtx
)),
9365 /* Generate an "pop" pattern for input ARG. */
9370 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9371 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9373 return gen_rtx_SET (VOIDmode
,
9375 gen_rtx_MEM (word_mode
,
9376 gen_rtx_POST_INC (Pmode
,
9377 stack_pointer_rtx
)));
9380 /* Return >= 0 if there is an unused call-clobbered register available
9381 for the entire function. */
9384 ix86_select_alt_pic_regnum (void)
9388 && !ix86_current_function_calls_tls_descriptor
)
9391 /* Can't use the same register for both PIC and DRAP. */
9393 drap
= REGNO (crtl
->drap_reg
);
9396 for (i
= 2; i
>= 0; --i
)
9397 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9401 return INVALID_REGNUM
;
9404 /* Return TRUE if we need to save REGNO. */
9407 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9409 if (pic_offset_table_rtx
9410 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9411 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9413 || crtl
->calls_eh_return
9414 || crtl
->uses_const_pool
9415 || cfun
->has_nonlocal_label
))
9416 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9418 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9423 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9424 if (test
== INVALID_REGNUM
)
9432 && regno
== REGNO (crtl
->drap_reg
)
9433 && !cfun
->machine
->no_drap_save_restore
)
9436 return (df_regs_ever_live_p (regno
)
9437 && !call_used_regs
[regno
]
9438 && !fixed_regs
[regno
]
9439 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9442 /* Return number of saved general prupose registers. */
9445 ix86_nsaved_regs (void)
9450 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9451 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9456 /* Return number of saved SSE registrers. */
9459 ix86_nsaved_sseregs (void)
9464 if (!TARGET_64BIT_MS_ABI
)
9466 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9467 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9472 /* Given FROM and TO register numbers, say whether this elimination is
9473 allowed. If stack alignment is needed, we can only replace argument
9474 pointer with hard frame pointer, or replace frame pointer with stack
9475 pointer. Otherwise, frame pointer elimination is automatically
9476 handled and all other eliminations are valid. */
9479 ix86_can_eliminate (const int from
, const int to
)
9481 if (stack_realign_fp
)
9482 return ((from
== ARG_POINTER_REGNUM
9483 && to
== HARD_FRAME_POINTER_REGNUM
)
9484 || (from
== FRAME_POINTER_REGNUM
9485 && to
== STACK_POINTER_REGNUM
));
9487 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9490 /* Return the offset between two registers, one to be eliminated, and the other
9491 its replacement, at the start of a routine. */
9494 ix86_initial_elimination_offset (int from
, int to
)
9496 struct ix86_frame frame
;
9497 ix86_compute_frame_layout (&frame
);
9499 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9500 return frame
.hard_frame_pointer_offset
;
9501 else if (from
== FRAME_POINTER_REGNUM
9502 && to
== HARD_FRAME_POINTER_REGNUM
)
9503 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9506 gcc_assert (to
== STACK_POINTER_REGNUM
);
9508 if (from
== ARG_POINTER_REGNUM
)
9509 return frame
.stack_pointer_offset
;
9511 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9512 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9516 /* In a dynamically-aligned function, we can't know the offset from
9517 stack pointer to frame pointer, so we must ensure that setjmp
9518 eliminates fp against the hard fp (%ebp) rather than trying to
9519 index from %esp up to the top of the frame across a gap that is
9520 of unknown (at compile-time) size. */
9522 ix86_builtin_setjmp_frame_value (void)
9524 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9527 /* When using -fsplit-stack, the allocation routines set a field in
9528 the TCB to the bottom of the stack plus this much space, measured
9531 #define SPLIT_STACK_AVAILABLE 256
9533 /* Fill structure ix86_frame about frame of currently computed function. */
9536 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9538 unsigned HOST_WIDE_INT stack_alignment_needed
;
9539 HOST_WIDE_INT offset
;
9540 unsigned HOST_WIDE_INT preferred_alignment
;
9541 HOST_WIDE_INT size
= get_frame_size ();
9542 HOST_WIDE_INT to_allocate
;
9544 frame
->nregs
= ix86_nsaved_regs ();
9545 frame
->nsseregs
= ix86_nsaved_sseregs ();
9547 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9548 function prologues and leaf. */
9549 if ((TARGET_64BIT_MS_ABI
&& crtl
->preferred_stack_boundary
< 128)
9550 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9551 || ix86_current_function_calls_tls_descriptor
))
9553 crtl
->preferred_stack_boundary
= 128;
9554 crtl
->stack_alignment_needed
= 128;
9556 /* preferred_stack_boundary is never updated for call
9557 expanded from tls descriptor. Update it here. We don't update it in
9558 expand stage because according to the comments before
9559 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9561 else if (ix86_current_function_calls_tls_descriptor
9562 && crtl
->preferred_stack_boundary
< PREFERRED_STACK_BOUNDARY
)
9564 crtl
->preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
9565 if (crtl
->stack_alignment_needed
< PREFERRED_STACK_BOUNDARY
)
9566 crtl
->stack_alignment_needed
= PREFERRED_STACK_BOUNDARY
;
9569 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9570 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9572 gcc_assert (!size
|| stack_alignment_needed
);
9573 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9574 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9576 /* For SEH we have to limit the amount of code movement into the prologue.
9577 At present we do this via a BLOCKAGE, at which point there's very little
9578 scheduling that can be done, which means that there's very little point
9579 in doing anything except PUSHs. */
9581 cfun
->machine
->use_fast_prologue_epilogue
= false;
9583 /* During reload iteration the amount of registers saved can change.
9584 Recompute the value as needed. Do not recompute when amount of registers
9585 didn't change as reload does multiple calls to the function and does not
9586 expect the decision to change within single iteration. */
9587 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
))
9588 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9590 int count
= frame
->nregs
;
9591 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
9593 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9595 /* The fast prologue uses move instead of push to save registers. This
9596 is significantly longer, but also executes faster as modern hardware
9597 can execute the moves in parallel, but can't do that for push/pop.
9599 Be careful about choosing what prologue to emit: When function takes
9600 many instructions to execute we may use slow version as well as in
9601 case function is known to be outside hot spot (this is known with
9602 feedback only). Weight the size of function by number of registers
9603 to save as it is cheap to use one or two push instructions but very
9604 slow to use many of them. */
9606 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9607 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9608 || (flag_branch_probabilities
9609 && node
->frequency
< NODE_FREQUENCY_HOT
))
9610 cfun
->machine
->use_fast_prologue_epilogue
= false;
9612 cfun
->machine
->use_fast_prologue_epilogue
9613 = !expensive_function_p (count
);
9616 frame
->save_regs_using_mov
9617 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9618 /* If static stack checking is enabled and done with probes,
9619 the registers need to be saved before allocating the frame. */
9620 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9622 /* Skip return address. */
9623 offset
= UNITS_PER_WORD
;
9625 /* Skip pushed static chain. */
9626 if (ix86_static_chain_on_stack
)
9627 offset
+= UNITS_PER_WORD
;
9629 /* Skip saved base pointer. */
9630 if (frame_pointer_needed
)
9631 offset
+= UNITS_PER_WORD
;
9632 frame
->hfp_save_offset
= offset
;
9634 /* The traditional frame pointer location is at the top of the frame. */
9635 frame
->hard_frame_pointer_offset
= offset
;
9637 /* Register save area */
9638 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9639 frame
->reg_save_offset
= offset
;
9641 /* On SEH target, registers are pushed just before the frame pointer
9644 frame
->hard_frame_pointer_offset
= offset
;
9646 /* Align and set SSE register save area. */
9647 if (frame
->nsseregs
)
9649 /* The only ABI that has saved SSE registers (Win64) also has a
9650 16-byte aligned default stack, and thus we don't need to be
9651 within the re-aligned local stack frame to save them. */
9652 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9653 offset
= (offset
+ 16 - 1) & -16;
9654 offset
+= frame
->nsseregs
* 16;
9656 frame
->sse_reg_save_offset
= offset
;
9658 /* The re-aligned stack starts here. Values before this point are not
9659 directly comparable with values below this point. In order to make
9660 sure that no value happens to be the same before and after, force
9661 the alignment computation below to add a non-zero value. */
9662 if (stack_realign_fp
)
9663 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9666 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9667 offset
+= frame
->va_arg_size
;
9669 /* Align start of frame for local function. */
9670 if (stack_realign_fp
9671 || offset
!= frame
->sse_reg_save_offset
9674 || cfun
->calls_alloca
9675 || ix86_current_function_calls_tls_descriptor
)
9676 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9678 /* Frame pointer points here. */
9679 frame
->frame_pointer_offset
= offset
;
9683 /* Add outgoing arguments area. Can be skipped if we eliminated
9684 all the function calls as dead code.
9685 Skipping is however impossible when function calls alloca. Alloca
9686 expander assumes that last crtl->outgoing_args_size
9687 of stack frame are unused. */
9688 if (ACCUMULATE_OUTGOING_ARGS
9689 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9690 || ix86_current_function_calls_tls_descriptor
))
9692 offset
+= crtl
->outgoing_args_size
;
9693 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9696 frame
->outgoing_arguments_size
= 0;
9698 /* Align stack boundary. Only needed if we're calling another function
9700 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9701 || ix86_current_function_calls_tls_descriptor
)
9702 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9704 /* We've reached end of stack frame. */
9705 frame
->stack_pointer_offset
= offset
;
9707 /* Size prologue needs to allocate. */
9708 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9710 if ((!to_allocate
&& frame
->nregs
<= 1)
9711 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9712 frame
->save_regs_using_mov
= false;
9714 if (ix86_using_red_zone ()
9715 && crtl
->sp_is_unchanging
9717 && !ix86_current_function_calls_tls_descriptor
)
9719 frame
->red_zone_size
= to_allocate
;
9720 if (frame
->save_regs_using_mov
)
9721 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9722 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9723 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9726 frame
->red_zone_size
= 0;
9727 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9729 /* The SEH frame pointer location is near the bottom of the frame.
9730 This is enforced by the fact that the difference between the
9731 stack pointer and the frame pointer is limited to 240 bytes in
9732 the unwind data structure. */
9737 /* If we can leave the frame pointer where it is, do so. Also, returns
9738 the establisher frame for __builtin_frame_address (0). */
9739 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9740 if (diff
<= SEH_MAX_FRAME_SIZE
9741 && (diff
> 240 || (diff
& 15) != 0)
9742 && !crtl
->accesses_prior_frames
)
9744 /* Ideally we'd determine what portion of the local stack frame
9745 (within the constraint of the lowest 240) is most heavily used.
9746 But without that complication, simply bias the frame pointer
9747 by 128 bytes so as to maximize the amount of the local stack
9748 frame that is addressable with 8-bit offsets. */
9749 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9754 /* This is semi-inlined memory_address_length, but simplified
9755 since we know that we're always dealing with reg+offset, and
9756 to avoid having to create and discard all that rtl. */
9759 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9765 /* EBP and R13 cannot be encoded without an offset. */
9766 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9768 else if (IN_RANGE (offset
, -128, 127))
9771 /* ESP and R12 must be encoded with a SIB byte. */
9772 if (regno
== SP_REG
|| regno
== R12_REG
)
9778 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9779 The valid base registers are taken from CFUN->MACHINE->FS. */
9782 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9784 const struct machine_function
*m
= cfun
->machine
;
9785 rtx base_reg
= NULL
;
9786 HOST_WIDE_INT base_offset
= 0;
9788 if (m
->use_fast_prologue_epilogue
)
9790 /* Choose the base register most likely to allow the most scheduling
9791 opportunities. Generally FP is valid throughout the function,
9792 while DRAP must be reloaded within the epilogue. But choose either
9793 over the SP due to increased encoding size. */
9797 base_reg
= hard_frame_pointer_rtx
;
9798 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9800 else if (m
->fs
.drap_valid
)
9802 base_reg
= crtl
->drap_reg
;
9803 base_offset
= 0 - cfa_offset
;
9805 else if (m
->fs
.sp_valid
)
9807 base_reg
= stack_pointer_rtx
;
9808 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9813 HOST_WIDE_INT toffset
;
9816 /* Choose the base register with the smallest address encoding.
9817 With a tie, choose FP > DRAP > SP. */
9820 base_reg
= stack_pointer_rtx
;
9821 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9822 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9824 if (m
->fs
.drap_valid
)
9826 toffset
= 0 - cfa_offset
;
9827 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9830 base_reg
= crtl
->drap_reg
;
9831 base_offset
= toffset
;
9837 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9838 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9841 base_reg
= hard_frame_pointer_rtx
;
9842 base_offset
= toffset
;
9847 gcc_assert (base_reg
!= NULL
);
9849 return plus_constant (Pmode
, base_reg
, base_offset
);
9852 /* Emit code to save registers in the prologue. */
9855 ix86_emit_save_regs (void)
9860 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9861 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9863 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9864 RTX_FRAME_RELATED_P (insn
) = 1;
9868 /* Emit a single register save at CFA - CFA_OFFSET. */
9871 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9872 HOST_WIDE_INT cfa_offset
)
9874 struct machine_function
*m
= cfun
->machine
;
9875 rtx reg
= gen_rtx_REG (mode
, regno
);
9876 rtx mem
, addr
, base
, insn
;
9878 addr
= choose_baseaddr (cfa_offset
);
9879 mem
= gen_frame_mem (mode
, addr
);
9881 /* For SSE saves, we need to indicate the 128-bit alignment. */
9882 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9884 insn
= emit_move_insn (mem
, reg
);
9885 RTX_FRAME_RELATED_P (insn
) = 1;
9888 if (GET_CODE (base
) == PLUS
)
9889 base
= XEXP (base
, 0);
9890 gcc_checking_assert (REG_P (base
));
9892 /* When saving registers into a re-aligned local stack frame, avoid
9893 any tricky guessing by dwarf2out. */
9894 if (m
->fs
.realigned
)
9896 gcc_checking_assert (stack_realign_drap
);
9898 if (regno
== REGNO (crtl
->drap_reg
))
9900 /* A bit of a hack. We force the DRAP register to be saved in
9901 the re-aligned stack frame, which provides us with a copy
9902 of the CFA that will last past the prologue. Install it. */
9903 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9904 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9905 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9906 mem
= gen_rtx_MEM (mode
, addr
);
9907 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9911 /* The frame pointer is a stable reference within the
9912 aligned frame. Use it. */
9913 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9914 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9915 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9916 mem
= gen_rtx_MEM (mode
, addr
);
9917 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9918 gen_rtx_SET (VOIDmode
, mem
, reg
));
9922 /* The memory may not be relative to the current CFA register,
9923 which means that we may need to generate a new pattern for
9924 use by the unwind info. */
9925 else if (base
!= m
->fs
.cfa_reg
)
9927 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9928 m
->fs
.cfa_offset
- cfa_offset
);
9929 mem
= gen_rtx_MEM (mode
, addr
);
9930 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9934 /* Emit code to save registers using MOV insns.
9935 First register is stored at CFA - CFA_OFFSET. */
9937 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9941 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9942 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9944 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9945 cfa_offset
-= UNITS_PER_WORD
;
9949 /* Emit code to save SSE registers using MOV insns.
9950 First register is stored at CFA - CFA_OFFSET. */
9952 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9956 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9957 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9959 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9964 static GTY(()) rtx queued_cfa_restores
;
9966 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9967 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9968 Don't add the note if the previously saved value will be left untouched
9969 within stack red-zone till return, as unwinders can find the same value
9970 in the register and on the stack. */
9973 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9975 if (!crtl
->shrink_wrapped
9976 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9981 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9982 RTX_FRAME_RELATED_P (insn
) = 1;
9986 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9989 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9992 ix86_add_queued_cfa_restore_notes (rtx insn
)
9995 if (!queued_cfa_restores
)
9997 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9999 XEXP (last
, 1) = REG_NOTES (insn
);
10000 REG_NOTES (insn
) = queued_cfa_restores
;
10001 queued_cfa_restores
= NULL_RTX
;
10002 RTX_FRAME_RELATED_P (insn
) = 1;
10005 /* Expand prologue or epilogue stack adjustment.
10006 The pattern exist to put a dependency on all ebp-based memory accesses.
10007 STYLE should be negative if instructions should be marked as frame related,
10008 zero if %r11 register is live and cannot be freely used and positive
10012 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
10013 int style
, bool set_cfa
)
10015 struct machine_function
*m
= cfun
->machine
;
10017 bool add_frame_related_expr
= false;
10019 if (Pmode
== SImode
)
10020 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
10021 else if (x86_64_immediate_operand (offset
, DImode
))
10022 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
10026 /* r11 is used by indirect sibcall return as well, set before the
10027 epilogue and used after the epilogue. */
10029 tmp
= gen_rtx_REG (DImode
, R11_REG
);
10032 gcc_assert (src
!= hard_frame_pointer_rtx
10033 && dest
!= hard_frame_pointer_rtx
);
10034 tmp
= hard_frame_pointer_rtx
;
10036 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
10038 add_frame_related_expr
= true;
10040 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
10043 insn
= emit_insn (insn
);
10045 ix86_add_queued_cfa_restore_notes (insn
);
10051 gcc_assert (m
->fs
.cfa_reg
== src
);
10052 m
->fs
.cfa_offset
+= INTVAL (offset
);
10053 m
->fs
.cfa_reg
= dest
;
10055 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
10056 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
10057 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
10058 RTX_FRAME_RELATED_P (insn
) = 1;
10060 else if (style
< 0)
10062 RTX_FRAME_RELATED_P (insn
) = 1;
10063 if (add_frame_related_expr
)
10065 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
10066 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
10067 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
10071 if (dest
== stack_pointer_rtx
)
10073 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
10074 bool valid
= m
->fs
.sp_valid
;
10076 if (src
== hard_frame_pointer_rtx
)
10078 valid
= m
->fs
.fp_valid
;
10079 ooffset
= m
->fs
.fp_offset
;
10081 else if (src
== crtl
->drap_reg
)
10083 valid
= m
->fs
.drap_valid
;
10088 /* Else there are two possibilities: SP itself, which we set
10089 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10090 taken care of this by hand along the eh_return path. */
10091 gcc_checking_assert (src
== stack_pointer_rtx
10092 || offset
== const0_rtx
);
10095 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
10096 m
->fs
.sp_valid
= valid
;
10100 /* Find an available register to be used as dynamic realign argument
10101 pointer regsiter. Such a register will be written in prologue and
10102 used in begin of body, so it must not be
10103 1. parameter passing register.
10105 We reuse static-chain register if it is available. Otherwise, we
10106 use DI for i386 and R13 for x86-64. We chose R13 since it has
10109 Return: the regno of chosen register. */
10111 static unsigned int
10112 find_drap_reg (void)
10114 tree decl
= cfun
->decl
;
10118 /* Use R13 for nested function or function need static chain.
10119 Since function with tail call may use any caller-saved
10120 registers in epilogue, DRAP must not use caller-saved
10121 register in such case. */
10122 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
10129 /* Use DI for nested function or function need static chain.
10130 Since function with tail call may use any caller-saved
10131 registers in epilogue, DRAP must not use caller-saved
10132 register in such case. */
10133 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
10136 /* Reuse static chain register if it isn't used for parameter
10138 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
10140 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
10141 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
10148 /* Return minimum incoming stack alignment. */
10150 static unsigned int
10151 ix86_minimum_incoming_stack_boundary (bool sibcall
)
10153 unsigned int incoming_stack_boundary
;
10155 /* Prefer the one specified at command line. */
10156 if (ix86_user_incoming_stack_boundary
)
10157 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
10158 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10159 if -mstackrealign is used, it isn't used for sibcall check and
10160 estimated stack alignment is 128bit. */
10163 && ix86_force_align_arg_pointer
10164 && crtl
->stack_alignment_estimated
== 128)
10165 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
10167 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
10169 /* Incoming stack alignment can be changed on individual functions
10170 via force_align_arg_pointer attribute. We use the smallest
10171 incoming stack boundary. */
10172 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
10173 && lookup_attribute (ix86_force_align_arg_pointer_string
,
10174 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
10175 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
10177 /* The incoming stack frame has to be aligned at least at
10178 parm_stack_boundary. */
10179 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
10180 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
10182 /* Stack at entrance of main is aligned by runtime. We use the
10183 smallest incoming stack boundary. */
10184 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
10185 && DECL_NAME (current_function_decl
)
10186 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
10187 && DECL_FILE_SCOPE_P (current_function_decl
))
10188 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
10190 return incoming_stack_boundary
;
10193 /* Update incoming stack boundary and estimated stack alignment. */
10196 ix86_update_stack_boundary (void)
10198 ix86_incoming_stack_boundary
10199 = ix86_minimum_incoming_stack_boundary (false);
10201 /* x86_64 vararg needs 16byte stack alignment for register save
10205 && crtl
->stack_alignment_estimated
< 128)
10206 crtl
->stack_alignment_estimated
= 128;
10209 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10210 needed or an rtx for DRAP otherwise. */
10213 ix86_get_drap_rtx (void)
10215 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
10216 crtl
->need_drap
= true;
10218 if (stack_realign_drap
)
10220 /* Assign DRAP to vDRAP and returns vDRAP */
10221 unsigned int regno
= find_drap_reg ();
10224 rtx_insn
*seq
, *insn
;
10226 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
10227 crtl
->drap_reg
= arg_ptr
;
10230 drap_vreg
= copy_to_reg (arg_ptr
);
10231 seq
= get_insns ();
10234 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
10237 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
10238 RTX_FRAME_RELATED_P (insn
) = 1;
10246 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10249 ix86_internal_arg_pointer (void)
10251 return virtual_incoming_args_rtx
;
10254 struct scratch_reg
{
10259 /* Return a short-lived scratch register for use on function entry.
10260 In 32-bit mode, it is valid only after the registers are saved
10261 in the prologue. This register must be released by means of
10262 release_scratch_register_on_entry once it is dead. */
10265 get_scratch_register_on_entry (struct scratch_reg
*sr
)
10273 /* We always use R11 in 64-bit mode. */
10278 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
10280 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10282 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10283 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
10284 int regparm
= ix86_function_regparm (fntype
, decl
);
10286 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
10288 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10289 for the static chain register. */
10290 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
10291 && drap_regno
!= AX_REG
)
10293 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10294 for the static chain register. */
10295 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
10297 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
10299 /* ecx is the static chain register. */
10300 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
10302 && drap_regno
!= CX_REG
)
10304 else if (ix86_save_reg (BX_REG
, true))
10306 /* esi is the static chain register. */
10307 else if (!(regparm
== 3 && static_chain_p
)
10308 && ix86_save_reg (SI_REG
, true))
10310 else if (ix86_save_reg (DI_REG
, true))
10314 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
10319 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
10322 rtx insn
= emit_insn (gen_push (sr
->reg
));
10323 RTX_FRAME_RELATED_P (insn
) = 1;
10327 /* Release a scratch register obtained from the preceding function. */
10330 release_scratch_register_on_entry (struct scratch_reg
*sr
)
10334 struct machine_function
*m
= cfun
->machine
;
10335 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
10337 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10338 RTX_FRAME_RELATED_P (insn
) = 1;
10339 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
10340 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10341 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
10342 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10346 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10348 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10351 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
10353 /* We skip the probe for the first interval + a small dope of 4 words and
10354 probe that many bytes past the specified size to maintain a protection
10355 area at the botton of the stack. */
10356 const int dope
= 4 * UNITS_PER_WORD
;
10357 rtx size_rtx
= GEN_INT (size
), last
;
10359 /* See if we have a constant small number of probes to generate. If so,
10360 that's the easy case. The run-time loop is made up of 11 insns in the
10361 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10362 for n # of intervals. */
10363 if (size
<= 5 * PROBE_INTERVAL
)
10365 HOST_WIDE_INT i
, adjust
;
10366 bool first_probe
= true;
10368 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10369 values of N from 1 until it exceeds SIZE. If only one probe is
10370 needed, this will not generate any code. Then adjust and probe
10371 to PROBE_INTERVAL + SIZE. */
10372 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10376 adjust
= 2 * PROBE_INTERVAL
+ dope
;
10377 first_probe
= false;
10380 adjust
= PROBE_INTERVAL
;
10382 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10383 plus_constant (Pmode
, stack_pointer_rtx
,
10385 emit_stack_probe (stack_pointer_rtx
);
10389 adjust
= size
+ PROBE_INTERVAL
+ dope
;
10391 adjust
= size
+ PROBE_INTERVAL
- i
;
10393 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10394 plus_constant (Pmode
, stack_pointer_rtx
,
10396 emit_stack_probe (stack_pointer_rtx
);
10398 /* Adjust back to account for the additional first interval. */
10399 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10400 plus_constant (Pmode
, stack_pointer_rtx
,
10401 PROBE_INTERVAL
+ dope
)));
10404 /* Otherwise, do the same as above, but in a loop. Note that we must be
10405 extra careful with variables wrapping around because we might be at
10406 the very top (or the very bottom) of the address space and we have
10407 to be able to handle this case properly; in particular, we use an
10408 equality test for the loop condition. */
10411 HOST_WIDE_INT rounded_size
;
10412 struct scratch_reg sr
;
10414 get_scratch_register_on_entry (&sr
);
10417 /* Step 1: round SIZE to the previous multiple of the interval. */
10419 rounded_size
= size
& -PROBE_INTERVAL
;
10422 /* Step 2: compute initial and final value of the loop counter. */
10424 /* SP = SP_0 + PROBE_INTERVAL. */
10425 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10426 plus_constant (Pmode
, stack_pointer_rtx
,
10427 - (PROBE_INTERVAL
+ dope
))));
10429 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10430 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10431 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10432 gen_rtx_PLUS (Pmode
, sr
.reg
,
10433 stack_pointer_rtx
)));
10436 /* Step 3: the loop
10438 while (SP != LAST_ADDR)
10440 SP = SP + PROBE_INTERVAL
10444 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10445 values of N from 1 until it is equal to ROUNDED_SIZE. */
10447 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10450 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10451 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10453 if (size
!= rounded_size
)
10455 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10456 plus_constant (Pmode
, stack_pointer_rtx
,
10457 rounded_size
- size
)));
10458 emit_stack_probe (stack_pointer_rtx
);
10461 /* Adjust back to account for the additional first interval. */
10462 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10463 plus_constant (Pmode
, stack_pointer_rtx
,
10464 PROBE_INTERVAL
+ dope
)));
10466 release_scratch_register_on_entry (&sr
);
10469 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10471 /* Even if the stack pointer isn't the CFA register, we need to correctly
10472 describe the adjustments made to it, in particular differentiate the
10473 frame-related ones from the frame-unrelated ones. */
10476 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10477 XVECEXP (expr
, 0, 0)
10478 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10479 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10480 XVECEXP (expr
, 0, 1)
10481 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10482 plus_constant (Pmode
, stack_pointer_rtx
,
10483 PROBE_INTERVAL
+ dope
+ size
));
10484 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10485 RTX_FRAME_RELATED_P (last
) = 1;
10487 cfun
->machine
->fs
.sp_offset
+= size
;
10490 /* Make sure nothing is scheduled before we are done. */
10491 emit_insn (gen_blockage ());
10494 /* Adjust the stack pointer up to REG while probing it. */
10497 output_adjust_stack_and_probe (rtx reg
)
10499 static int labelno
= 0;
10500 char loop_lab
[32], end_lab
[32];
10503 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10504 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10506 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10508 /* Jump to END_LAB if SP == LAST_ADDR. */
10509 xops
[0] = stack_pointer_rtx
;
10511 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10512 fputs ("\tje\t", asm_out_file
);
10513 assemble_name_raw (asm_out_file
, end_lab
);
10514 fputc ('\n', asm_out_file
);
10516 /* SP = SP + PROBE_INTERVAL. */
10517 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10518 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10521 xops
[1] = const0_rtx
;
10522 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10524 fprintf (asm_out_file
, "\tjmp\t");
10525 assemble_name_raw (asm_out_file
, loop_lab
);
10526 fputc ('\n', asm_out_file
);
10528 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10533 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10534 inclusive. These are offsets from the current stack pointer. */
10537 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10539 /* See if we have a constant small number of probes to generate. If so,
10540 that's the easy case. The run-time loop is made up of 7 insns in the
10541 generic case while the compile-time loop is made up of n insns for n #
10543 if (size
<= 7 * PROBE_INTERVAL
)
10547 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10548 it exceeds SIZE. If only one probe is needed, this will not
10549 generate any code. Then probe at FIRST + SIZE. */
10550 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10551 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10554 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10558 /* Otherwise, do the same as above, but in a loop. Note that we must be
10559 extra careful with variables wrapping around because we might be at
10560 the very top (or the very bottom) of the address space and we have
10561 to be able to handle this case properly; in particular, we use an
10562 equality test for the loop condition. */
10565 HOST_WIDE_INT rounded_size
, last
;
10566 struct scratch_reg sr
;
10568 get_scratch_register_on_entry (&sr
);
10571 /* Step 1: round SIZE to the previous multiple of the interval. */
10573 rounded_size
= size
& -PROBE_INTERVAL
;
10576 /* Step 2: compute initial and final value of the loop counter. */
10578 /* TEST_OFFSET = FIRST. */
10579 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10581 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10582 last
= first
+ rounded_size
;
10585 /* Step 3: the loop
10587 while (TEST_ADDR != LAST_ADDR)
10589 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10593 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10594 until it is equal to ROUNDED_SIZE. */
10596 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10599 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10600 that SIZE is equal to ROUNDED_SIZE. */
10602 if (size
!= rounded_size
)
10603 emit_stack_probe (plus_constant (Pmode
,
10604 gen_rtx_PLUS (Pmode
,
10607 rounded_size
- size
));
10609 release_scratch_register_on_entry (&sr
);
10612 /* Make sure nothing is scheduled before we are done. */
10613 emit_insn (gen_blockage ());
10616 /* Probe a range of stack addresses from REG to END, inclusive. These are
10617 offsets from the current stack pointer. */
10620 output_probe_stack_range (rtx reg
, rtx end
)
10622 static int labelno
= 0;
10623 char loop_lab
[32], end_lab
[32];
10626 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10627 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10629 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10631 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10634 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10635 fputs ("\tje\t", asm_out_file
);
10636 assemble_name_raw (asm_out_file
, end_lab
);
10637 fputc ('\n', asm_out_file
);
10639 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10640 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10641 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10643 /* Probe at TEST_ADDR. */
10644 xops
[0] = stack_pointer_rtx
;
10646 xops
[2] = const0_rtx
;
10647 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10649 fprintf (asm_out_file
, "\tjmp\t");
10650 assemble_name_raw (asm_out_file
, loop_lab
);
10651 fputc ('\n', asm_out_file
);
10653 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10658 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10659 to be generated in correct form. */
10661 ix86_finalize_stack_realign_flags (void)
10663 /* Check if stack realign is really needed after reload, and
10664 stores result in cfun */
10665 unsigned int incoming_stack_boundary
10666 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10667 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10668 unsigned int stack_realign
= (incoming_stack_boundary
10670 ? crtl
->max_used_stack_slot_alignment
10671 : crtl
->stack_alignment_needed
));
10673 if (crtl
->stack_realign_finalized
)
10675 /* After stack_realign_needed is finalized, we can't no longer
10677 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10681 /* If the only reason for frame_pointer_needed is that we conservatively
10682 assumed stack realignment might be needed, but in the end nothing that
10683 needed the stack alignment had been spilled, clear frame_pointer_needed
10684 and say we don't need stack realignment. */
10686 && frame_pointer_needed
10688 && flag_omit_frame_pointer
10689 && crtl
->sp_is_unchanging
10690 && !ix86_current_function_calls_tls_descriptor
10691 && !crtl
->accesses_prior_frames
10692 && !cfun
->calls_alloca
10693 && !crtl
->calls_eh_return
10694 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10695 && !ix86_frame_pointer_required ()
10696 && get_frame_size () == 0
10697 && ix86_nsaved_sseregs () == 0
10698 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10700 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10703 CLEAR_HARD_REG_SET (prologue_used
);
10704 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10705 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10706 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10707 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10708 HARD_FRAME_POINTER_REGNUM
);
10709 FOR_EACH_BB_FN (bb
, cfun
)
10712 FOR_BB_INSNS (bb
, insn
)
10713 if (NONDEBUG_INSN_P (insn
)
10714 && requires_stack_frame_p (insn
, prologue_used
,
10715 set_up_by_prologue
))
10717 crtl
->stack_realign_needed
= stack_realign
;
10718 crtl
->stack_realign_finalized
= true;
10723 /* If drap has been set, but it actually isn't live at the start
10724 of the function, there is no reason to set it up. */
10725 if (crtl
->drap_reg
)
10727 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
10728 if (! REGNO_REG_SET_P (DF_LR_IN (bb
), REGNO (crtl
->drap_reg
)))
10730 crtl
->drap_reg
= NULL_RTX
;
10731 crtl
->need_drap
= false;
10735 cfun
->machine
->no_drap_save_restore
= true;
10737 frame_pointer_needed
= false;
10738 stack_realign
= false;
10739 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10740 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10741 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10742 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10743 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10744 df_finish_pass (true);
10745 df_scan_alloc (NULL
);
10747 df_compute_regs_ever_live (true);
10751 crtl
->stack_realign_needed
= stack_realign
;
10752 crtl
->stack_realign_finalized
= true;
10755 /* Expand the prologue into a bunch of separate insns. */
10758 ix86_expand_prologue (void)
10760 struct machine_function
*m
= cfun
->machine
;
10763 struct ix86_frame frame
;
10764 HOST_WIDE_INT allocate
;
10765 bool int_registers_saved
;
10766 bool sse_registers_saved
;
10768 ix86_finalize_stack_realign_flags ();
10770 /* DRAP should not coexist with stack_realign_fp */
10771 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10773 memset (&m
->fs
, 0, sizeof (m
->fs
));
10775 /* Initialize CFA state for before the prologue. */
10776 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10777 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10779 /* Track SP offset to the CFA. We continue tracking this after we've
10780 swapped the CFA register away from SP. In the case of re-alignment
10781 this is fudged; we're interested to offsets within the local frame. */
10782 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10783 m
->fs
.sp_valid
= true;
10785 ix86_compute_frame_layout (&frame
);
10787 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10789 /* We should have already generated an error for any use of
10790 ms_hook on a nested function. */
10791 gcc_checking_assert (!ix86_static_chain_on_stack
);
10793 /* Check if profiling is active and we shall use profiling before
10794 prologue variant. If so sorry. */
10795 if (crtl
->profile
&& flag_fentry
!= 0)
10796 sorry ("ms_hook_prologue attribute isn%'t compatible "
10797 "with -mfentry for 32-bit");
10799 /* In ix86_asm_output_function_label we emitted:
10800 8b ff movl.s %edi,%edi
10802 8b ec movl.s %esp,%ebp
10804 This matches the hookable function prologue in Win32 API
10805 functions in Microsoft Windows XP Service Pack 2 and newer.
10806 Wine uses this to enable Windows apps to hook the Win32 API
10807 functions provided by Wine.
10809 What that means is that we've already set up the frame pointer. */
10811 if (frame_pointer_needed
10812 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10816 /* We've decided to use the frame pointer already set up.
10817 Describe this to the unwinder by pretending that both
10818 push and mov insns happen right here.
10820 Putting the unwind info here at the end of the ms_hook
10821 is done so that we can make absolutely certain we get
10822 the required byte sequence at the start of the function,
10823 rather than relying on an assembler that can produce
10824 the exact encoding required.
10826 However it does mean (in the unpatched case) that we have
10827 a 1 insn window where the asynchronous unwind info is
10828 incorrect. However, if we placed the unwind info at
10829 its correct location we would have incorrect unwind info
10830 in the patched case. Which is probably all moot since
10831 I don't expect Wine generates dwarf2 unwind info for the
10832 system libraries that use this feature. */
10834 insn
= emit_insn (gen_blockage ());
10836 push
= gen_push (hard_frame_pointer_rtx
);
10837 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10838 stack_pointer_rtx
);
10839 RTX_FRAME_RELATED_P (push
) = 1;
10840 RTX_FRAME_RELATED_P (mov
) = 1;
10842 RTX_FRAME_RELATED_P (insn
) = 1;
10843 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10844 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10846 /* Note that gen_push incremented m->fs.cfa_offset, even
10847 though we didn't emit the push insn here. */
10848 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10849 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10850 m
->fs
.fp_valid
= true;
10854 /* The frame pointer is not needed so pop %ebp again.
10855 This leaves us with a pristine state. */
10856 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10860 /* The first insn of a function that accepts its static chain on the
10861 stack is to push the register that would be filled in by a direct
10862 call. This insn will be skipped by the trampoline. */
10863 else if (ix86_static_chain_on_stack
)
10865 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10866 emit_insn (gen_blockage ());
10868 /* We don't want to interpret this push insn as a register save,
10869 only as a stack adjustment. The real copy of the register as
10870 a save will be done later, if needed. */
10871 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10872 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10873 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10874 RTX_FRAME_RELATED_P (insn
) = 1;
10877 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10878 of DRAP is needed and stack realignment is really needed after reload */
10879 if (stack_realign_drap
)
10881 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10883 /* Only need to push parameter pointer reg if it is caller saved. */
10884 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10886 /* Push arg pointer reg */
10887 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10888 RTX_FRAME_RELATED_P (insn
) = 1;
10891 /* Grab the argument pointer. */
10892 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10893 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10894 RTX_FRAME_RELATED_P (insn
) = 1;
10895 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10896 m
->fs
.cfa_offset
= 0;
10898 /* Align the stack. */
10899 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10901 GEN_INT (-align_bytes
)));
10902 RTX_FRAME_RELATED_P (insn
) = 1;
10904 /* Replicate the return address on the stack so that return
10905 address can be reached via (argp - 1) slot. This is needed
10906 to implement macro RETURN_ADDR_RTX and intrinsic function
10907 expand_builtin_return_addr etc. */
10908 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10909 t
= gen_frame_mem (word_mode
, t
);
10910 insn
= emit_insn (gen_push (t
));
10911 RTX_FRAME_RELATED_P (insn
) = 1;
10913 /* For the purposes of frame and register save area addressing,
10914 we've started over with a new frame. */
10915 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10916 m
->fs
.realigned
= true;
10919 int_registers_saved
= (frame
.nregs
== 0);
10920 sse_registers_saved
= (frame
.nsseregs
== 0);
10922 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10924 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10925 slower on all targets. Also sdb doesn't like it. */
10926 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10927 RTX_FRAME_RELATED_P (insn
) = 1;
10929 /* Push registers now, before setting the frame pointer
10931 if (!int_registers_saved
10933 && !frame
.save_regs_using_mov
)
10935 ix86_emit_save_regs ();
10936 int_registers_saved
= true;
10937 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10940 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10942 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10943 RTX_FRAME_RELATED_P (insn
) = 1;
10945 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10946 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10947 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10948 m
->fs
.fp_valid
= true;
10952 if (!int_registers_saved
)
10954 /* If saving registers via PUSH, do so now. */
10955 if (!frame
.save_regs_using_mov
)
10957 ix86_emit_save_regs ();
10958 int_registers_saved
= true;
10959 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10962 /* When using red zone we may start register saving before allocating
10963 the stack frame saving one cycle of the prologue. However, avoid
10964 doing this if we have to probe the stack; at least on x86_64 the
10965 stack probe can turn into a call that clobbers a red zone location. */
10966 else if (ix86_using_red_zone ()
10967 && (! TARGET_STACK_PROBE
10968 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10970 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10971 int_registers_saved
= true;
10975 if (stack_realign_fp
)
10977 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10978 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10980 /* The computation of the size of the re-aligned stack frame means
10981 that we must allocate the size of the register save area before
10982 performing the actual alignment. Otherwise we cannot guarantee
10983 that there's enough storage above the realignment point. */
10984 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10985 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10986 GEN_INT (m
->fs
.sp_offset
10987 - frame
.sse_reg_save_offset
),
10990 /* Align the stack. */
10991 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10993 GEN_INT (-align_bytes
)));
10995 /* For the purposes of register save area addressing, the stack
10996 pointer is no longer valid. As for the value of sp_offset,
10997 see ix86_compute_frame_layout, which we need to match in order
10998 to pass verification of stack_pointer_offset at the end. */
10999 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
11000 m
->fs
.sp_valid
= false;
11003 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
11005 if (flag_stack_usage_info
)
11007 /* We start to count from ARG_POINTER. */
11008 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
11010 /* If it was realigned, take into account the fake frame. */
11011 if (stack_realign_drap
)
11013 if (ix86_static_chain_on_stack
)
11014 stack_size
+= UNITS_PER_WORD
;
11016 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11017 stack_size
+= UNITS_PER_WORD
;
11019 /* This over-estimates by 1 minimal-stack-alignment-unit but
11020 mitigates that by counting in the new return address slot. */
11021 current_function_dynamic_stack_size
11022 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
11025 current_function_static_stack_size
= stack_size
;
11028 /* On SEH target with very large frame size, allocate an area to save
11029 SSE registers (as the very large allocation won't be described). */
11031 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
11032 && !sse_registers_saved
)
11034 HOST_WIDE_INT sse_size
=
11035 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
11037 gcc_assert (int_registers_saved
);
11039 /* No need to do stack checking as the area will be immediately
11041 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11042 GEN_INT (-sse_size
), -1,
11043 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11044 allocate
-= sse_size
;
11045 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
11046 sse_registers_saved
= true;
11049 /* The stack has already been decremented by the instruction calling us
11050 so probe if the size is non-negative to preserve the protection area. */
11051 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
11053 /* We expect the registers to be saved when probes are used. */
11054 gcc_assert (int_registers_saved
);
11056 if (STACK_CHECK_MOVING_SP
)
11058 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
11059 && allocate
<= PROBE_INTERVAL
))
11061 ix86_adjust_stack_and_probe (allocate
);
11067 HOST_WIDE_INT size
= allocate
;
11069 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
11070 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
11072 if (TARGET_STACK_PROBE
)
11074 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
11076 if (size
> PROBE_INTERVAL
)
11077 ix86_emit_probe_stack_range (0, size
);
11080 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
11084 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
11086 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
11087 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
,
11088 size
- STACK_CHECK_PROTECT
);
11091 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
11098 else if (!ix86_target_stack_probe ()
11099 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
11101 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11102 GEN_INT (-allocate
), -1,
11103 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11107 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
11109 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
11110 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11111 bool eax_live
= ix86_eax_live_at_start_p ();
11112 bool r10_live
= false;
11115 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
11119 insn
= emit_insn (gen_push (eax
));
11120 allocate
-= UNITS_PER_WORD
;
11121 /* Note that SEH directives need to continue tracking the stack
11122 pointer even after the frame pointer has been set up. */
11123 if (sp_is_cfa_reg
|| TARGET_SEH
)
11126 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
11127 RTX_FRAME_RELATED_P (insn
) = 1;
11133 r10
= gen_rtx_REG (Pmode
, R10_REG
);
11134 insn
= emit_insn (gen_push (r10
));
11135 allocate
-= UNITS_PER_WORD
;
11136 if (sp_is_cfa_reg
|| TARGET_SEH
)
11139 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
11140 RTX_FRAME_RELATED_P (insn
) = 1;
11144 emit_move_insn (eax
, GEN_INT (allocate
));
11145 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
11147 /* Use the fact that AX still contains ALLOCATE. */
11148 adjust_stack_insn
= (Pmode
== DImode
11149 ? gen_pro_epilogue_adjust_stack_di_sub
11150 : gen_pro_epilogue_adjust_stack_si_sub
);
11152 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
11153 stack_pointer_rtx
, eax
));
11155 if (sp_is_cfa_reg
|| TARGET_SEH
)
11158 m
->fs
.cfa_offset
+= allocate
;
11159 RTX_FRAME_RELATED_P (insn
) = 1;
11160 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11161 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
11162 plus_constant (Pmode
, stack_pointer_rtx
,
11165 m
->fs
.sp_offset
+= allocate
;
11167 /* Use stack_pointer_rtx for relative addressing so that code
11168 works for realigned stack, too. */
11169 if (r10_live
&& eax_live
)
11171 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
11172 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11173 gen_frame_mem (word_mode
, t
));
11174 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
11175 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
11176 gen_frame_mem (word_mode
, t
));
11178 else if (eax_live
|| r10_live
)
11180 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
11181 emit_move_insn (gen_rtx_REG (word_mode
,
11182 (eax_live
? AX_REG
: R10_REG
)),
11183 gen_frame_mem (word_mode
, t
));
11186 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11188 /* If we havn't already set up the frame pointer, do so now. */
11189 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
11191 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
11192 GEN_INT (frame
.stack_pointer_offset
11193 - frame
.hard_frame_pointer_offset
));
11194 insn
= emit_insn (insn
);
11195 RTX_FRAME_RELATED_P (insn
) = 1;
11196 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
11198 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
11199 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
11200 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
11201 m
->fs
.fp_valid
= true;
11204 if (!int_registers_saved
)
11205 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
11206 if (!sse_registers_saved
)
11207 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
11209 pic_reg_used
= false;
11210 /* We don't use pic-register for pe-coff target. */
11211 if (pic_offset_table_rtx
11213 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
11216 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
11218 if (alt_pic_reg_used
!= INVALID_REGNUM
)
11219 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
11221 pic_reg_used
= true;
11228 if (ix86_cmodel
== CM_LARGE_PIC
)
11230 rtx_code_label
*label
;
11233 gcc_assert (Pmode
== DImode
);
11234 label
= gen_label_rtx ();
11235 emit_label (label
);
11236 LABEL_PRESERVE_P (label
) = 1;
11237 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
11238 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
11239 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
11241 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
11242 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
11243 pic_offset_table_rtx
, tmp_reg
));
11246 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
11250 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
11251 RTX_FRAME_RELATED_P (insn
) = 1;
11252 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
11256 /* In the pic_reg_used case, make sure that the got load isn't deleted
11257 when mcount needs it. Blockage to avoid call movement across mcount
11258 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11260 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
11261 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
11263 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
11265 /* vDRAP is setup but after reload it turns out stack realign
11266 isn't necessary, here we will emit prologue to setup DRAP
11267 without stack realign adjustment */
11268 t
= choose_baseaddr (0);
11269 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
11272 /* Prevent instructions from being scheduled into register save push
11273 sequence when access to the redzone area is done through frame pointer.
11274 The offset between the frame pointer and the stack pointer is calculated
11275 relative to the value of the stack pointer at the end of the function
11276 prologue, and moving instructions that access redzone area via frame
11277 pointer inside push sequence violates this assumption. */
11278 if (frame_pointer_needed
&& frame
.red_zone_size
)
11279 emit_insn (gen_memory_blockage ());
11281 /* Emit cld instruction if stringops are used in the function. */
11282 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
11283 emit_insn (gen_cld ());
11285 /* SEH requires that the prologue end within 256 bytes of the start of
11286 the function. Prevent instruction schedules that would extend that.
11287 Further, prevent alloca modifications to the stack pointer from being
11288 combined with prologue modifications. */
11290 emit_insn (gen_prologue_use (stack_pointer_rtx
));
11293 /* Emit code to restore REG using a POP insn. */
11296 ix86_emit_restore_reg_using_pop (rtx reg
)
11298 struct machine_function
*m
= cfun
->machine
;
11299 rtx insn
= emit_insn (gen_pop (reg
));
11301 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
11302 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11304 if (m
->fs
.cfa_reg
== crtl
->drap_reg
11305 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
11307 /* Previously we'd represented the CFA as an expression
11308 like *(%ebp - 8). We've just popped that value from
11309 the stack, which means we need to reset the CFA to
11310 the drap register. This will remain until we restore
11311 the stack pointer. */
11312 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11313 RTX_FRAME_RELATED_P (insn
) = 1;
11315 /* This means that the DRAP register is valid for addressing too. */
11316 m
->fs
.drap_valid
= true;
11320 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
11322 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
11323 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
11324 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
11325 RTX_FRAME_RELATED_P (insn
) = 1;
11327 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11330 /* When the frame pointer is the CFA, and we pop it, we are
11331 swapping back to the stack pointer as the CFA. This happens
11332 for stack frames that don't allocate other data, so we assume
11333 the stack pointer is now pointing at the return address, i.e.
11334 the function entry state, which makes the offset be 1 word. */
11335 if (reg
== hard_frame_pointer_rtx
)
11337 m
->fs
.fp_valid
= false;
11338 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11340 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11341 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11343 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11344 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11345 GEN_INT (m
->fs
.cfa_offset
)));
11346 RTX_FRAME_RELATED_P (insn
) = 1;
11351 /* Emit code to restore saved registers using POP insns. */
11354 ix86_emit_restore_regs_using_pop (void)
11356 unsigned int regno
;
11358 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11359 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
11360 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
11363 /* Emit code and notes for the LEAVE instruction. */
11366 ix86_emit_leave (void)
11368 struct machine_function
*m
= cfun
->machine
;
11369 rtx insn
= emit_insn (ix86_gen_leave ());
11371 ix86_add_queued_cfa_restore_notes (insn
);
11373 gcc_assert (m
->fs
.fp_valid
);
11374 m
->fs
.sp_valid
= true;
11375 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
11376 m
->fs
.fp_valid
= false;
11378 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11380 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11381 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
11383 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11384 plus_constant (Pmode
, stack_pointer_rtx
,
11386 RTX_FRAME_RELATED_P (insn
) = 1;
11388 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
11392 /* Emit code to restore saved registers using MOV insns.
11393 First register is restored from CFA - CFA_OFFSET. */
11395 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11396 bool maybe_eh_return
)
11398 struct machine_function
*m
= cfun
->machine
;
11399 unsigned int regno
;
11401 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11402 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11404 rtx reg
= gen_rtx_REG (word_mode
, regno
);
11407 mem
= choose_baseaddr (cfa_offset
);
11408 mem
= gen_frame_mem (word_mode
, mem
);
11409 insn
= emit_move_insn (reg
, mem
);
11411 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
11413 /* Previously we'd represented the CFA as an expression
11414 like *(%ebp - 8). We've just popped that value from
11415 the stack, which means we need to reset the CFA to
11416 the drap register. This will remain until we restore
11417 the stack pointer. */
11418 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11419 RTX_FRAME_RELATED_P (insn
) = 1;
11421 /* This means that the DRAP register is valid for addressing. */
11422 m
->fs
.drap_valid
= true;
11425 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11427 cfa_offset
-= UNITS_PER_WORD
;
11431 /* Emit code to restore saved registers using MOV insns.
11432 First register is restored from CFA - CFA_OFFSET. */
11434 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11435 bool maybe_eh_return
)
11437 unsigned int regno
;
11439 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11440 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11442 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11445 mem
= choose_baseaddr (cfa_offset
);
11446 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11447 set_mem_align (mem
, 128);
11448 emit_move_insn (reg
, mem
);
11450 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11456 /* Restore function stack, frame, and registers. */
11459 ix86_expand_epilogue (int style
)
11461 struct machine_function
*m
= cfun
->machine
;
11462 struct machine_frame_state frame_state_save
= m
->fs
;
11463 struct ix86_frame frame
;
11464 bool restore_regs_via_mov
;
11467 ix86_finalize_stack_realign_flags ();
11468 ix86_compute_frame_layout (&frame
);
11470 m
->fs
.sp_valid
= (!frame_pointer_needed
11471 || (crtl
->sp_is_unchanging
11472 && !stack_realign_fp
));
11473 gcc_assert (!m
->fs
.sp_valid
11474 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11476 /* The FP must be valid if the frame pointer is present. */
11477 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11478 gcc_assert (!m
->fs
.fp_valid
11479 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11481 /* We must have *some* valid pointer to the stack frame. */
11482 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11484 /* The DRAP is never valid at this point. */
11485 gcc_assert (!m
->fs
.drap_valid
);
11487 /* See the comment about red zone and frame
11488 pointer usage in ix86_expand_prologue. */
11489 if (frame_pointer_needed
&& frame
.red_zone_size
)
11490 emit_insn (gen_memory_blockage ());
11492 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11493 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11495 /* Determine the CFA offset of the end of the red-zone. */
11496 m
->fs
.red_zone_offset
= 0;
11497 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11499 /* The red-zone begins below the return address. */
11500 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11502 /* When the register save area is in the aligned portion of
11503 the stack, determine the maximum runtime displacement that
11504 matches up with the aligned frame. */
11505 if (stack_realign_drap
)
11506 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11510 /* Special care must be taken for the normal return case of a function
11511 using eh_return: the eax and edx registers are marked as saved, but
11512 not restored along this path. Adjust the save location to match. */
11513 if (crtl
->calls_eh_return
&& style
!= 2)
11514 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11516 /* EH_RETURN requires the use of moves to function properly. */
11517 if (crtl
->calls_eh_return
)
11518 restore_regs_via_mov
= true;
11519 /* SEH requires the use of pops to identify the epilogue. */
11520 else if (TARGET_SEH
)
11521 restore_regs_via_mov
= false;
11522 /* If we're only restoring one register and sp is not valid then
11523 using a move instruction to restore the register since it's
11524 less work than reloading sp and popping the register. */
11525 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11526 restore_regs_via_mov
= true;
11527 else if (TARGET_EPILOGUE_USING_MOVE
11528 && cfun
->machine
->use_fast_prologue_epilogue
11529 && (frame
.nregs
> 1
11530 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11531 restore_regs_via_mov
= true;
11532 else if (frame_pointer_needed
11534 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11535 restore_regs_via_mov
= true;
11536 else if (frame_pointer_needed
11537 && TARGET_USE_LEAVE
11538 && cfun
->machine
->use_fast_prologue_epilogue
11539 && frame
.nregs
== 1)
11540 restore_regs_via_mov
= true;
11542 restore_regs_via_mov
= false;
11544 if (restore_regs_via_mov
|| frame
.nsseregs
)
11546 /* Ensure that the entire register save area is addressable via
11547 the stack pointer, if we will restore via sp. */
11549 && m
->fs
.sp_offset
> 0x7fffffff
11550 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11551 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11553 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11554 GEN_INT (m
->fs
.sp_offset
11555 - frame
.sse_reg_save_offset
),
11557 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11561 /* If there are any SSE registers to restore, then we have to do it
11562 via moves, since there's obviously no pop for SSE regs. */
11563 if (frame
.nsseregs
)
11564 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11567 if (restore_regs_via_mov
)
11572 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11574 /* eh_return epilogues need %ecx added to the stack pointer. */
11577 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11579 /* Stack align doesn't work with eh_return. */
11580 gcc_assert (!stack_realign_drap
);
11581 /* Neither does regparm nested functions. */
11582 gcc_assert (!ix86_static_chain_on_stack
);
11584 if (frame_pointer_needed
)
11586 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11587 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11588 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11590 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11591 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11593 /* Note that we use SA as a temporary CFA, as the return
11594 address is at the proper place relative to it. We
11595 pretend this happens at the FP restore insn because
11596 prior to this insn the FP would be stored at the wrong
11597 offset relative to SA, and after this insn we have no
11598 other reasonable register to use for the CFA. We don't
11599 bother resetting the CFA to the SP for the duration of
11600 the return insn. */
11601 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11602 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11603 ix86_add_queued_cfa_restore_notes (insn
);
11604 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11605 RTX_FRAME_RELATED_P (insn
) = 1;
11607 m
->fs
.cfa_reg
= sa
;
11608 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11609 m
->fs
.fp_valid
= false;
11611 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11612 const0_rtx
, style
, false);
11616 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11617 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11618 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11619 ix86_add_queued_cfa_restore_notes (insn
);
11621 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11622 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11624 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11625 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11626 plus_constant (Pmode
, stack_pointer_rtx
,
11628 RTX_FRAME_RELATED_P (insn
) = 1;
11631 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11632 m
->fs
.sp_valid
= true;
11637 /* SEH requires that the function end with (1) a stack adjustment
11638 if necessary, (2) a sequence of pops, and (3) a return or
11639 jump instruction. Prevent insns from the function body from
11640 being scheduled into this sequence. */
11643 /* Prevent a catch region from being adjacent to the standard
11644 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11645 several other flags that would be interesting to test are
11647 if (flag_non_call_exceptions
)
11648 emit_insn (gen_nops (const1_rtx
));
11650 emit_insn (gen_blockage ());
11653 /* First step is to deallocate the stack frame so that we can
11654 pop the registers. Also do it on SEH target for very large
11655 frame as the emitted instructions aren't allowed by the ABI in
11657 if (!m
->fs
.sp_valid
11659 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11660 >= SEH_MAX_FRAME_SIZE
)))
11662 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11663 GEN_INT (m
->fs
.fp_offset
11664 - frame
.reg_save_offset
),
11667 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11669 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11670 GEN_INT (m
->fs
.sp_offset
11671 - frame
.reg_save_offset
),
11673 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11676 ix86_emit_restore_regs_using_pop ();
11679 /* If we used a stack pointer and haven't already got rid of it,
11681 if (m
->fs
.fp_valid
)
11683 /* If the stack pointer is valid and pointing at the frame
11684 pointer store address, then we only need a pop. */
11685 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11686 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11687 /* Leave results in shorter dependency chains on CPUs that are
11688 able to grok it fast. */
11689 else if (TARGET_USE_LEAVE
11690 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
11691 || !cfun
->machine
->use_fast_prologue_epilogue
)
11692 ix86_emit_leave ();
11695 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11696 hard_frame_pointer_rtx
,
11697 const0_rtx
, style
, !using_drap
);
11698 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11704 int param_ptr_offset
= UNITS_PER_WORD
;
11707 gcc_assert (stack_realign_drap
);
11709 if (ix86_static_chain_on_stack
)
11710 param_ptr_offset
+= UNITS_PER_WORD
;
11711 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11712 param_ptr_offset
+= UNITS_PER_WORD
;
11714 insn
= emit_insn (gen_rtx_SET
11715 (VOIDmode
, stack_pointer_rtx
,
11716 gen_rtx_PLUS (Pmode
,
11718 GEN_INT (-param_ptr_offset
))));
11719 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11720 m
->fs
.cfa_offset
= param_ptr_offset
;
11721 m
->fs
.sp_offset
= param_ptr_offset
;
11722 m
->fs
.realigned
= false;
11724 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11725 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11726 GEN_INT (param_ptr_offset
)));
11727 RTX_FRAME_RELATED_P (insn
) = 1;
11729 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11730 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11733 /* At this point the stack pointer must be valid, and we must have
11734 restored all of the registers. We may not have deallocated the
11735 entire stack frame. We've delayed this until now because it may
11736 be possible to merge the local stack deallocation with the
11737 deallocation forced by ix86_static_chain_on_stack. */
11738 gcc_assert (m
->fs
.sp_valid
);
11739 gcc_assert (!m
->fs
.fp_valid
);
11740 gcc_assert (!m
->fs
.realigned
);
11741 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11743 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11744 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11748 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11750 /* Sibcall epilogues don't want a return instruction. */
11753 m
->fs
= frame_state_save
;
11757 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11759 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11761 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11762 address, do explicit add, and jump indirectly to the caller. */
11764 if (crtl
->args
.pops_args
>= 65536)
11766 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11769 /* There is no "pascal" calling convention in any 64bit ABI. */
11770 gcc_assert (!TARGET_64BIT
);
11772 insn
= emit_insn (gen_pop (ecx
));
11773 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11774 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11776 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
11777 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
11778 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
11779 add_reg_note (insn
, REG_CFA_REGISTER
,
11780 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11781 RTX_FRAME_RELATED_P (insn
) = 1;
11783 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11785 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11788 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11791 emit_jump_insn (gen_simple_return_internal ());
11793 /* Restore the state back to the state from the prologue,
11794 so that it's correct for the next epilogue. */
11795 m
->fs
= frame_state_save
;
11798 /* Reset from the function's potential modifications. */
11801 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
, HOST_WIDE_INT
)
11803 if (pic_offset_table_rtx
)
11804 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11806 /* Mach-O doesn't support labels at the end of objects, so if
11807 it looks like we might want one, insert a NOP. */
11809 rtx_insn
*insn
= get_last_insn ();
11810 rtx_insn
*deleted_debug_label
= NULL
;
11813 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11815 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11816 notes only, instead set their CODE_LABEL_NUMBER to -1,
11817 otherwise there would be code generation differences
11818 in between -g and -g0. */
11819 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11820 deleted_debug_label
= insn
;
11821 insn
= PREV_INSN (insn
);
11826 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11827 fputs ("\tnop\n", file
);
11828 else if (deleted_debug_label
)
11829 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11830 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11831 CODE_LABEL_NUMBER (insn
) = -1;
11837 /* Return a scratch register to use in the split stack prologue. The
11838 split stack prologue is used for -fsplit-stack. It is the first
11839 instructions in the function, even before the regular prologue.
11840 The scratch register can be any caller-saved register which is not
11841 used for parameters or for the static chain. */
11843 static unsigned int
11844 split_stack_prologue_scratch_regno (void)
11850 bool is_fastcall
, is_thiscall
;
11853 is_fastcall
= (lookup_attribute ("fastcall",
11854 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11856 is_thiscall
= (lookup_attribute ("thiscall",
11857 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11859 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11863 if (DECL_STATIC_CHAIN (cfun
->decl
))
11865 sorry ("-fsplit-stack does not support fastcall with "
11866 "nested function");
11867 return INVALID_REGNUM
;
11871 else if (is_thiscall
)
11873 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11877 else if (regparm
< 3)
11879 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11885 sorry ("-fsplit-stack does not support 2 register "
11886 "parameters for a nested function");
11887 return INVALID_REGNUM
;
11894 /* FIXME: We could make this work by pushing a register
11895 around the addition and comparison. */
11896 sorry ("-fsplit-stack does not support 3 register parameters");
11897 return INVALID_REGNUM
;
11902 /* A SYMBOL_REF for the function which allocates new stackspace for
11905 static GTY(()) rtx split_stack_fn
;
11907 /* A SYMBOL_REF for the more stack function when using the large
11910 static GTY(()) rtx split_stack_fn_large
;
11912 /* Handle -fsplit-stack. These are the first instructions in the
11913 function, even before the regular prologue. */
11916 ix86_expand_split_stack_prologue (void)
11918 struct ix86_frame frame
;
11919 HOST_WIDE_INT allocate
;
11920 unsigned HOST_WIDE_INT args_size
;
11921 rtx_code_label
*label
;
11922 rtx limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11923 rtx scratch_reg
= NULL_RTX
;
11924 rtx_code_label
*varargs_label
= NULL
;
11927 gcc_assert (flag_split_stack
&& reload_completed
);
11929 ix86_finalize_stack_realign_flags ();
11930 ix86_compute_frame_layout (&frame
);
11931 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11933 /* This is the label we will branch to if we have enough stack
11934 space. We expect the basic block reordering pass to reverse this
11935 branch if optimizing, so that we branch in the unlikely case. */
11936 label
= gen_label_rtx ();
11938 /* We need to compare the stack pointer minus the frame size with
11939 the stack boundary in the TCB. The stack boundary always gives
11940 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11941 can compare directly. Otherwise we need to do an addition. */
11943 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11944 UNSPEC_STACK_CHECK
);
11945 limit
= gen_rtx_CONST (Pmode
, limit
);
11946 limit
= gen_rtx_MEM (Pmode
, limit
);
11947 if (allocate
< SPLIT_STACK_AVAILABLE
)
11948 current
= stack_pointer_rtx
;
11951 unsigned int scratch_regno
;
11954 /* We need a scratch register to hold the stack pointer minus
11955 the required frame size. Since this is the very start of the
11956 function, the scratch register can be any caller-saved
11957 register which is not used for parameters. */
11958 offset
= GEN_INT (- allocate
);
11959 scratch_regno
= split_stack_prologue_scratch_regno ();
11960 if (scratch_regno
== INVALID_REGNUM
)
11962 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11963 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11965 /* We don't use ix86_gen_add3 in this case because it will
11966 want to split to lea, but when not optimizing the insn
11967 will not be split after this point. */
11968 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11969 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11974 emit_move_insn (scratch_reg
, offset
);
11975 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11976 stack_pointer_rtx
));
11978 current
= scratch_reg
;
11981 ix86_expand_branch (GEU
, current
, limit
, label
);
11982 jump_insn
= get_last_insn ();
11983 JUMP_LABEL (jump_insn
) = label
;
11985 /* Mark the jump as very likely to be taken. */
11986 add_int_reg_note (jump_insn
, REG_BR_PROB
,
11987 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
11989 if (split_stack_fn
== NULL_RTX
)
11990 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11991 fn
= split_stack_fn
;
11993 /* Get more stack space. We pass in the desired stack space and the
11994 size of the arguments to copy to the new stack. In 32-bit mode
11995 we push the parameters; __morestack will return on a new stack
11996 anyhow. In 64-bit mode we pass the parameters in r10 and
11998 allocate_rtx
= GEN_INT (allocate
);
11999 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
12000 call_fusage
= NULL_RTX
;
12005 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
12006 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
12008 /* If this function uses a static chain, it will be in %r10.
12009 Preserve it across the call to __morestack. */
12010 if (DECL_STATIC_CHAIN (cfun
->decl
))
12014 rax
= gen_rtx_REG (word_mode
, AX_REG
);
12015 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
12016 use_reg (&call_fusage
, rax
);
12019 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
12022 HOST_WIDE_INT argval
;
12024 gcc_assert (Pmode
== DImode
);
12025 /* When using the large model we need to load the address
12026 into a register, and we've run out of registers. So we
12027 switch to a different calling convention, and we call a
12028 different function: __morestack_large. We pass the
12029 argument size in the upper 32 bits of r10 and pass the
12030 frame size in the lower 32 bits. */
12031 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
12032 gcc_assert ((args_size
& 0xffffffff) == args_size
);
12034 if (split_stack_fn_large
== NULL_RTX
)
12035 split_stack_fn_large
=
12036 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
12038 if (ix86_cmodel
== CM_LARGE_PIC
)
12040 rtx_code_label
*label
;
12043 label
= gen_label_rtx ();
12044 emit_label (label
);
12045 LABEL_PRESERVE_P (label
) = 1;
12046 emit_insn (gen_set_rip_rex64 (reg10
, label
));
12047 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
12048 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
12049 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
12051 x
= gen_rtx_CONST (Pmode
, x
);
12052 emit_move_insn (reg11
, x
);
12053 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
12054 x
= gen_const_mem (Pmode
, x
);
12055 emit_move_insn (reg11
, x
);
12058 emit_move_insn (reg11
, split_stack_fn_large
);
12062 argval
= ((args_size
<< 16) << 16) + allocate
;
12063 emit_move_insn (reg10
, GEN_INT (argval
));
12067 emit_move_insn (reg10
, allocate_rtx
);
12068 emit_move_insn (reg11
, GEN_INT (args_size
));
12069 use_reg (&call_fusage
, reg11
);
12072 use_reg (&call_fusage
, reg10
);
12076 emit_insn (gen_push (GEN_INT (args_size
)));
12077 emit_insn (gen_push (allocate_rtx
));
12079 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
12080 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
12082 add_function_usage_to (call_insn
, call_fusage
);
12084 /* In order to make call/return prediction work right, we now need
12085 to execute a return instruction. See
12086 libgcc/config/i386/morestack.S for the details on how this works.
12088 For flow purposes gcc must not see this as a return
12089 instruction--we need control flow to continue at the subsequent
12090 label. Therefore, we use an unspec. */
12091 gcc_assert (crtl
->args
.pops_args
< 65536);
12092 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
12094 /* If we are in 64-bit mode and this function uses a static chain,
12095 we saved %r10 in %rax before calling _morestack. */
12096 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
12097 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
12098 gen_rtx_REG (word_mode
, AX_REG
));
12100 /* If this function calls va_start, we need to store a pointer to
12101 the arguments on the old stack, because they may not have been
12102 all copied to the new stack. At this point the old stack can be
12103 found at the frame pointer value used by __morestack, because
12104 __morestack has set that up before calling back to us. Here we
12105 store that pointer in a scratch register, and in
12106 ix86_expand_prologue we store the scratch register in a stack
12108 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12110 unsigned int scratch_regno
;
12114 scratch_regno
= split_stack_prologue_scratch_regno ();
12115 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
12116 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
12120 return address within this function
12121 return address of caller of this function
12123 So we add three words to get to the stack arguments.
12127 return address within this function
12128 first argument to __morestack
12129 second argument to __morestack
12130 return address of caller of this function
12132 So we add five words to get to the stack arguments.
12134 words
= TARGET_64BIT
? 3 : 5;
12135 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
12136 gen_rtx_PLUS (Pmode
, frame_reg
,
12137 GEN_INT (words
* UNITS_PER_WORD
))));
12139 varargs_label
= gen_label_rtx ();
12140 emit_jump_insn (gen_jump (varargs_label
));
12141 JUMP_LABEL (get_last_insn ()) = varargs_label
;
12146 emit_label (label
);
12147 LABEL_NUSES (label
) = 1;
12149 /* If this function calls va_start, we now have to set the scratch
12150 register for the case where we do not call __morestack. In this
12151 case we need to set it based on the stack pointer. */
12152 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12154 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
12155 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12156 GEN_INT (UNITS_PER_WORD
))));
12158 emit_label (varargs_label
);
12159 LABEL_NUSES (varargs_label
) = 1;
12163 /* We may have to tell the dataflow pass that the split stack prologue
12164 is initializing a scratch register. */
12167 ix86_live_on_entry (bitmap regs
)
12169 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12171 gcc_assert (flag_split_stack
);
12172 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
12176 /* Extract the parts of an RTL expression that is a valid memory address
12177 for an instruction. Return 0 if the structure of the address is
12178 grossly off. Return -1 if the address contains ASHIFT, so it is not
12179 strictly valid, but still used for computing length of lea instruction. */
12182 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
12184 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
12185 rtx base_reg
, index_reg
;
12186 HOST_WIDE_INT scale
= 1;
12187 rtx scale_rtx
= NULL_RTX
;
12190 enum ix86_address_seg seg
= SEG_DEFAULT
;
12192 /* Allow zero-extended SImode addresses,
12193 they will be emitted with addr32 prefix. */
12194 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
12196 if (GET_CODE (addr
) == ZERO_EXTEND
12197 && GET_MODE (XEXP (addr
, 0)) == SImode
)
12199 addr
= XEXP (addr
, 0);
12200 if (CONST_INT_P (addr
))
12203 else if (GET_CODE (addr
) == AND
12204 && const_32bit_mask (XEXP (addr
, 1), DImode
))
12206 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
12207 if (addr
== NULL_RTX
)
12210 if (CONST_INT_P (addr
))
12215 /* Allow SImode subregs of DImode addresses,
12216 they will be emitted with addr32 prefix. */
12217 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
12219 if (GET_CODE (addr
) == SUBREG
12220 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
12222 addr
= SUBREG_REG (addr
);
12223 if (CONST_INT_P (addr
))
12230 else if (GET_CODE (addr
) == SUBREG
)
12232 if (REG_P (SUBREG_REG (addr
)))
12237 else if (GET_CODE (addr
) == PLUS
)
12239 rtx addends
[4], op
;
12247 addends
[n
++] = XEXP (op
, 1);
12250 while (GET_CODE (op
) == PLUS
);
12255 for (i
= n
; i
>= 0; --i
)
12258 switch (GET_CODE (op
))
12263 index
= XEXP (op
, 0);
12264 scale_rtx
= XEXP (op
, 1);
12270 index
= XEXP (op
, 0);
12271 tmp
= XEXP (op
, 1);
12272 if (!CONST_INT_P (tmp
))
12274 scale
= INTVAL (tmp
);
12275 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12277 scale
= 1 << scale
;
12282 if (GET_CODE (op
) != UNSPEC
)
12287 if (XINT (op
, 1) == UNSPEC_TP
12288 && TARGET_TLS_DIRECT_SEG_REFS
12289 && seg
== SEG_DEFAULT
)
12290 seg
= DEFAULT_TLS_SEG_REG
;
12296 if (!REG_P (SUBREG_REG (op
)))
12323 else if (GET_CODE (addr
) == MULT
)
12325 index
= XEXP (addr
, 0); /* index*scale */
12326 scale_rtx
= XEXP (addr
, 1);
12328 else if (GET_CODE (addr
) == ASHIFT
)
12330 /* We're called for lea too, which implements ashift on occasion. */
12331 index
= XEXP (addr
, 0);
12332 tmp
= XEXP (addr
, 1);
12333 if (!CONST_INT_P (tmp
))
12335 scale
= INTVAL (tmp
);
12336 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12338 scale
= 1 << scale
;
12342 disp
= addr
; /* displacement */
12348 else if (GET_CODE (index
) == SUBREG
12349 && REG_P (SUBREG_REG (index
)))
12355 /* Extract the integral value of scale. */
12358 if (!CONST_INT_P (scale_rtx
))
12360 scale
= INTVAL (scale_rtx
);
12363 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
12364 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
12366 /* Avoid useless 0 displacement. */
12367 if (disp
== const0_rtx
&& (base
|| index
))
12370 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12371 if (base_reg
&& index_reg
&& scale
== 1
12372 && (index_reg
== arg_pointer_rtx
12373 || index_reg
== frame_pointer_rtx
12374 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
12377 tmp
= base
, base
= index
, index
= tmp
;
12378 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
12381 /* Special case: %ebp cannot be encoded as a base without a displacement.
12385 && (base_reg
== hard_frame_pointer_rtx
12386 || base_reg
== frame_pointer_rtx
12387 || base_reg
== arg_pointer_rtx
12388 || (REG_P (base_reg
)
12389 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
12390 || REGNO (base_reg
) == R13_REG
))))
12393 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12394 Avoid this by transforming to [%esi+0].
12395 Reload calls address legitimization without cfun defined, so we need
12396 to test cfun for being non-NULL. */
12397 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
12398 && base_reg
&& !index_reg
&& !disp
12399 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
12402 /* Special case: encode reg+reg instead of reg*2. */
12403 if (!base
&& index
&& scale
== 2)
12404 base
= index
, base_reg
= index_reg
, scale
= 1;
12406 /* Special case: scaling cannot be encoded without base or displacement. */
12407 if (!base
&& !disp
&& index
&& scale
!= 1)
12411 out
->index
= index
;
12413 out
->scale
= scale
;
12419 /* Return cost of the memory address x.
12420 For i386, it is better to use a complex address than let gcc copy
12421 the address into a reg and make a new pseudo. But not if the address
12422 requires to two regs - that would mean more pseudos with longer
12425 ix86_address_cost (rtx x
, enum machine_mode
, addr_space_t
, bool)
12427 struct ix86_address parts
;
12429 int ok
= ix86_decompose_address (x
, &parts
);
12433 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12434 parts
.base
= SUBREG_REG (parts
.base
);
12435 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12436 parts
.index
= SUBREG_REG (parts
.index
);
12438 /* Attempt to minimize number of registers in the address. */
12440 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12442 && (!REG_P (parts
.index
)
12443 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12447 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12449 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12450 && parts
.base
!= parts
.index
)
12453 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12454 since it's predecode logic can't detect the length of instructions
12455 and it degenerates to vector decoded. Increase cost of such
12456 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12457 to split such addresses or even refuse such addresses at all.
12459 Following addressing modes are affected:
12464 The first and last case may be avoidable by explicitly coding the zero in
12465 memory address, but I don't have AMD-K6 machine handy to check this
12469 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12470 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12471 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12477 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12478 this is used for to form addresses to local data when -fPIC is in
12482 darwin_local_data_pic (rtx disp
)
12484 return (GET_CODE (disp
) == UNSPEC
12485 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12488 /* Determine if a given RTX is a valid constant. We already know this
12489 satisfies CONSTANT_P. */
12492 ix86_legitimate_constant_p (enum machine_mode
, rtx x
)
12494 switch (GET_CODE (x
))
12499 if (GET_CODE (x
) == PLUS
)
12501 if (!CONST_INT_P (XEXP (x
, 1)))
12506 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12509 /* Only some unspecs are valid as "constants". */
12510 if (GET_CODE (x
) == UNSPEC
)
12511 switch (XINT (x
, 1))
12514 case UNSPEC_GOTOFF
:
12515 case UNSPEC_PLTOFF
:
12516 return TARGET_64BIT
;
12518 case UNSPEC_NTPOFF
:
12519 x
= XVECEXP (x
, 0, 0);
12520 return (GET_CODE (x
) == SYMBOL_REF
12521 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12522 case UNSPEC_DTPOFF
:
12523 x
= XVECEXP (x
, 0, 0);
12524 return (GET_CODE (x
) == SYMBOL_REF
12525 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12530 /* We must have drilled down to a symbol. */
12531 if (GET_CODE (x
) == LABEL_REF
)
12533 if (GET_CODE (x
) != SYMBOL_REF
)
12538 /* TLS symbols are never valid. */
12539 if (SYMBOL_REF_TLS_MODEL (x
))
12542 /* DLLIMPORT symbols are never valid. */
12543 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12544 && SYMBOL_REF_DLLIMPORT_P (x
))
12548 /* mdynamic-no-pic */
12549 if (MACHO_DYNAMIC_NO_PIC_P
)
12550 return machopic_symbol_defined_p (x
);
12555 if (GET_MODE (x
) == TImode
12556 && x
!= CONST0_RTX (TImode
)
12562 if (!standard_sse_constant_p (x
))
12569 /* Otherwise we handle everything else in the move patterns. */
12573 /* Determine if it's legal to put X into the constant pool. This
12574 is not possible for the address of thread-local symbols, which
12575 is checked above. */
12578 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12580 /* We can always put integral constants and vectors in memory. */
12581 switch (GET_CODE (x
))
12591 return !ix86_legitimate_constant_p (mode
, x
);
12594 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12598 is_imported_p (rtx x
)
12600 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12601 || GET_CODE (x
) != SYMBOL_REF
)
12604 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12608 /* Nonzero if the constant value X is a legitimate general operand
12609 when generating PIC code. It is given that flag_pic is on and
12610 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12613 legitimate_pic_operand_p (rtx x
)
12617 switch (GET_CODE (x
))
12620 inner
= XEXP (x
, 0);
12621 if (GET_CODE (inner
) == PLUS
12622 && CONST_INT_P (XEXP (inner
, 1)))
12623 inner
= XEXP (inner
, 0);
12625 /* Only some unspecs are valid as "constants". */
12626 if (GET_CODE (inner
) == UNSPEC
)
12627 switch (XINT (inner
, 1))
12630 case UNSPEC_GOTOFF
:
12631 case UNSPEC_PLTOFF
:
12632 return TARGET_64BIT
;
12634 x
= XVECEXP (inner
, 0, 0);
12635 return (GET_CODE (x
) == SYMBOL_REF
12636 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12637 case UNSPEC_MACHOPIC_OFFSET
:
12638 return legitimate_pic_address_disp_p (x
);
12646 return legitimate_pic_address_disp_p (x
);
12653 /* Determine if a given CONST RTX is a valid memory displacement
12657 legitimate_pic_address_disp_p (rtx disp
)
12661 /* In 64bit mode we can allow direct addresses of symbols and labels
12662 when they are not dynamic symbols. */
12665 rtx op0
= disp
, op1
;
12667 switch (GET_CODE (disp
))
12673 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12675 op0
= XEXP (XEXP (disp
, 0), 0);
12676 op1
= XEXP (XEXP (disp
, 0), 1);
12677 if (!CONST_INT_P (op1
)
12678 || INTVAL (op1
) >= 16*1024*1024
12679 || INTVAL (op1
) < -16*1024*1024)
12681 if (GET_CODE (op0
) == LABEL_REF
)
12683 if (GET_CODE (op0
) == CONST
12684 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12685 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12687 if (GET_CODE (op0
) == UNSPEC
12688 && XINT (op0
, 1) == UNSPEC_PCREL
)
12690 if (GET_CODE (op0
) != SYMBOL_REF
)
12695 /* TLS references should always be enclosed in UNSPEC.
12696 The dllimported symbol needs always to be resolved. */
12697 if (SYMBOL_REF_TLS_MODEL (op0
)
12698 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12703 if (is_imported_p (op0
))
12706 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12707 || !SYMBOL_REF_LOCAL_P (op0
))
12710 /* Function-symbols need to be resolved only for
12712 For the small-model we don't need to resolve anything
12714 if ((ix86_cmodel
!= CM_LARGE_PIC
12715 && SYMBOL_REF_FUNCTION_P (op0
))
12716 || ix86_cmodel
== CM_SMALL_PIC
)
12718 /* Non-external symbols don't need to be resolved for
12719 large, and medium-model. */
12720 if ((ix86_cmodel
== CM_LARGE_PIC
12721 || ix86_cmodel
== CM_MEDIUM_PIC
)
12722 && !SYMBOL_REF_EXTERNAL_P (op0
))
12725 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12726 && SYMBOL_REF_LOCAL_P (op0
)
12727 && ix86_cmodel
!= CM_LARGE_PIC
)
12735 if (GET_CODE (disp
) != CONST
)
12737 disp
= XEXP (disp
, 0);
12741 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12742 of GOT tables. We should not need these anyway. */
12743 if (GET_CODE (disp
) != UNSPEC
12744 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12745 && XINT (disp
, 1) != UNSPEC_GOTOFF
12746 && XINT (disp
, 1) != UNSPEC_PCREL
12747 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12750 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12751 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12757 if (GET_CODE (disp
) == PLUS
)
12759 if (!CONST_INT_P (XEXP (disp
, 1)))
12761 disp
= XEXP (disp
, 0);
12765 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12768 if (GET_CODE (disp
) != UNSPEC
)
12771 switch (XINT (disp
, 1))
12776 /* We need to check for both symbols and labels because VxWorks loads
12777 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12779 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12780 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12781 case UNSPEC_GOTOFF
:
12782 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12783 While ABI specify also 32bit relocation but we don't produce it in
12784 small PIC model at all. */
12785 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12786 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12788 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12790 case UNSPEC_GOTTPOFF
:
12791 case UNSPEC_GOTNTPOFF
:
12792 case UNSPEC_INDNTPOFF
:
12795 disp
= XVECEXP (disp
, 0, 0);
12796 return (GET_CODE (disp
) == SYMBOL_REF
12797 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12798 case UNSPEC_NTPOFF
:
12799 disp
= XVECEXP (disp
, 0, 0);
12800 return (GET_CODE (disp
) == SYMBOL_REF
12801 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12802 case UNSPEC_DTPOFF
:
12803 disp
= XVECEXP (disp
, 0, 0);
12804 return (GET_CODE (disp
) == SYMBOL_REF
12805 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12811 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12812 replace the input X, or the original X if no replacement is called for.
12813 The output parameter *WIN is 1 if the calling macro should goto WIN,
12814 0 if it should not. */
12817 ix86_legitimize_reload_address (rtx x
, enum machine_mode
, int opnum
, int type
,
12820 /* Reload can generate:
12822 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12826 This RTX is rejected from ix86_legitimate_address_p due to
12827 non-strictness of base register 97. Following this rejection,
12828 reload pushes all three components into separate registers,
12829 creating invalid memory address RTX.
12831 Following code reloads only the invalid part of the
12832 memory address RTX. */
12834 if (GET_CODE (x
) == PLUS
12835 && REG_P (XEXP (x
, 1))
12836 && GET_CODE (XEXP (x
, 0)) == PLUS
12837 && REG_P (XEXP (XEXP (x
, 0), 1)))
12840 bool something_reloaded
= false;
12842 base
= XEXP (XEXP (x
, 0), 1);
12843 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12845 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12846 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12847 opnum
, (enum reload_type
) type
);
12848 something_reloaded
= true;
12851 index
= XEXP (x
, 1);
12852 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12854 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12855 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12856 opnum
, (enum reload_type
) type
);
12857 something_reloaded
= true;
12860 gcc_assert (something_reloaded
);
12867 /* Determine if op is suitable RTX for an address register.
12868 Return naked register if a register or a register subreg is
12869 found, otherwise return NULL_RTX. */
12872 ix86_validate_address_register (rtx op
)
12874 enum machine_mode mode
= GET_MODE (op
);
12876 /* Only SImode or DImode registers can form the address. */
12877 if (mode
!= SImode
&& mode
!= DImode
)
12882 else if (GET_CODE (op
) == SUBREG
)
12884 rtx reg
= SUBREG_REG (op
);
12889 mode
= GET_MODE (reg
);
12891 /* Don't allow SUBREGs that span more than a word. It can
12892 lead to spill failures when the register is one word out
12893 of a two word structure. */
12894 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
12897 /* Allow only SUBREGs of non-eliminable hard registers. */
12898 if (register_no_elim_operand (reg
, mode
))
12902 /* Op is not a register. */
12906 /* Recognizes RTL expressions that are valid memory addresses for an
12907 instruction. The MODE argument is the machine mode for the MEM
12908 expression that wants to use this address.
12910 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12911 convert common non-canonical forms to canonical form so that they will
12915 ix86_legitimate_address_p (enum machine_mode
, rtx addr
, bool strict
)
12917 struct ix86_address parts
;
12918 rtx base
, index
, disp
;
12919 HOST_WIDE_INT scale
;
12920 enum ix86_address_seg seg
;
12922 if (ix86_decompose_address (addr
, &parts
) <= 0)
12923 /* Decomposition failed. */
12927 index
= parts
.index
;
12929 scale
= parts
.scale
;
12932 /* Validate base register. */
12935 rtx reg
= ix86_validate_address_register (base
);
12937 if (reg
== NULL_RTX
)
12940 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12941 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12942 /* Base is not valid. */
12946 /* Validate index register. */
12949 rtx reg
= ix86_validate_address_register (index
);
12951 if (reg
== NULL_RTX
)
12954 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12955 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12956 /* Index is not valid. */
12960 /* Index and base should have the same mode. */
12962 && GET_MODE (base
) != GET_MODE (index
))
12965 /* Address override works only on the (%reg) part of %fs:(%reg). */
12966 if (seg
!= SEG_DEFAULT
12967 && ((base
&& GET_MODE (base
) != word_mode
)
12968 || (index
&& GET_MODE (index
) != word_mode
)))
12971 /* Validate scale factor. */
12975 /* Scale without index. */
12978 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12979 /* Scale is not a valid multiplier. */
12983 /* Validate displacement. */
12986 if (GET_CODE (disp
) == CONST
12987 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12988 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12989 switch (XINT (XEXP (disp
, 0), 1))
12991 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12992 used. While ABI specify also 32bit relocations, we don't produce
12993 them at all and use IP relative instead. */
12995 case UNSPEC_GOTOFF
:
12996 gcc_assert (flag_pic
);
12998 goto is_legitimate_pic
;
13000 /* 64bit address unspec. */
13003 case UNSPEC_GOTPCREL
:
13005 gcc_assert (flag_pic
);
13006 goto is_legitimate_pic
;
13008 case UNSPEC_GOTTPOFF
:
13009 case UNSPEC_GOTNTPOFF
:
13010 case UNSPEC_INDNTPOFF
:
13011 case UNSPEC_NTPOFF
:
13012 case UNSPEC_DTPOFF
:
13015 case UNSPEC_STACK_CHECK
:
13016 gcc_assert (flag_split_stack
);
13020 /* Invalid address unspec. */
13024 else if (SYMBOLIC_CONST (disp
)
13028 && MACHOPIC_INDIRECT
13029 && !machopic_operand_p (disp
)
13035 if (TARGET_64BIT
&& (index
|| base
))
13037 /* foo@dtpoff(%rX) is ok. */
13038 if (GET_CODE (disp
) != CONST
13039 || GET_CODE (XEXP (disp
, 0)) != PLUS
13040 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
13041 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
13042 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
13043 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
13044 /* Non-constant pic memory reference. */
13047 else if ((!TARGET_MACHO
|| flag_pic
)
13048 && ! legitimate_pic_address_disp_p (disp
))
13049 /* Displacement is an invalid pic construct. */
13052 else if (MACHO_DYNAMIC_NO_PIC_P
13053 && !ix86_legitimate_constant_p (Pmode
, disp
))
13054 /* displacment must be referenced via non_lazy_pointer */
13058 /* This code used to verify that a symbolic pic displacement
13059 includes the pic_offset_table_rtx register.
13061 While this is good idea, unfortunately these constructs may
13062 be created by "adds using lea" optimization for incorrect
13071 This code is nonsensical, but results in addressing
13072 GOT table with pic_offset_table_rtx base. We can't
13073 just refuse it easily, since it gets matched by
13074 "addsi3" pattern, that later gets split to lea in the
13075 case output register differs from input. While this
13076 can be handled by separate addsi pattern for this case
13077 that never results in lea, this seems to be easier and
13078 correct fix for crash to disable this test. */
13080 else if (GET_CODE (disp
) != LABEL_REF
13081 && !CONST_INT_P (disp
)
13082 && (GET_CODE (disp
) != CONST
13083 || !ix86_legitimate_constant_p (Pmode
, disp
))
13084 && (GET_CODE (disp
) != SYMBOL_REF
13085 || !ix86_legitimate_constant_p (Pmode
, disp
)))
13086 /* Displacement is not constant. */
13088 else if (TARGET_64BIT
13089 && !x86_64_immediate_operand (disp
, VOIDmode
))
13090 /* Displacement is out of range. */
13092 /* In x32 mode, constant addresses are sign extended to 64bit, so
13093 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13094 else if (TARGET_X32
&& !(index
|| base
)
13095 && CONST_INT_P (disp
)
13096 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
13100 /* Everything looks valid. */
13104 /* Determine if a given RTX is a valid constant address. */
13107 constant_address_p (rtx x
)
13109 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
13112 /* Return a unique alias set for the GOT. */
13114 static alias_set_type
13115 ix86_GOT_alias_set (void)
13117 static alias_set_type set
= -1;
13119 set
= new_alias_set ();
13123 /* Return a legitimate reference for ORIG (an address) using the
13124 register REG. If REG is 0, a new pseudo is generated.
13126 There are two types of references that must be handled:
13128 1. Global data references must load the address from the GOT, via
13129 the PIC reg. An insn is emitted to do this load, and the reg is
13132 2. Static data references, constant pool addresses, and code labels
13133 compute the address as an offset from the GOT, whose base is in
13134 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13135 differentiate them from global data objects. The returned
13136 address is the PIC reg + an unspec constant.
13138 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13139 reg also appears in the address. */
13142 legitimize_pic_address (rtx orig
, rtx reg
)
13145 rtx new_rtx
= orig
;
13148 if (TARGET_MACHO
&& !TARGET_64BIT
)
13151 reg
= gen_reg_rtx (Pmode
);
13152 /* Use the generic Mach-O PIC machinery. */
13153 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
13157 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13159 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
13164 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
13166 else if (TARGET_64BIT
&& !TARGET_PECOFF
13167 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
13170 /* This symbol may be referenced via a displacement from the PIC
13171 base address (@GOTOFF). */
13173 if (reload_in_progress
)
13174 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13175 if (GET_CODE (addr
) == CONST
)
13176 addr
= XEXP (addr
, 0);
13177 if (GET_CODE (addr
) == PLUS
)
13179 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
13181 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
13184 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
13185 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13187 tmpreg
= gen_reg_rtx (Pmode
);
13190 emit_move_insn (tmpreg
, new_rtx
);
13194 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
13195 tmpreg
, 1, OPTAB_DIRECT
);
13199 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
13201 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
13203 /* This symbol may be referenced via a displacement from the PIC
13204 base address (@GOTOFF). */
13206 if (reload_in_progress
)
13207 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13208 if (GET_CODE (addr
) == CONST
)
13209 addr
= XEXP (addr
, 0);
13210 if (GET_CODE (addr
) == PLUS
)
13212 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
13214 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
13217 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
13218 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13219 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13223 emit_move_insn (reg
, new_rtx
);
13227 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
13228 /* We can't use @GOTOFF for text labels on VxWorks;
13229 see gotoff_operand. */
13230 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
13232 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
13236 /* For x64 PE-COFF there is no GOT table. So we use address
13238 if (TARGET_64BIT
&& TARGET_PECOFF
)
13240 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
13241 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13244 reg
= gen_reg_rtx (Pmode
);
13245 emit_move_insn (reg
, new_rtx
);
13248 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
13250 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
13251 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13252 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
13253 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
13256 reg
= gen_reg_rtx (Pmode
);
13257 /* Use directly gen_movsi, otherwise the address is loaded
13258 into register for CSE. We don't want to CSE this addresses,
13259 instead we CSE addresses from the GOT table, so skip this. */
13260 emit_insn (gen_movsi (reg
, new_rtx
));
13265 /* This symbol must be referenced via a load from the
13266 Global Offset Table (@GOT). */
13268 if (reload_in_progress
)
13269 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13270 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
13271 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13273 new_rtx
= force_reg (Pmode
, new_rtx
);
13274 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13275 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
13276 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
13279 reg
= gen_reg_rtx (Pmode
);
13280 emit_move_insn (reg
, new_rtx
);
13286 if (CONST_INT_P (addr
)
13287 && !x86_64_immediate_operand (addr
, VOIDmode
))
13291 emit_move_insn (reg
, addr
);
13295 new_rtx
= force_reg (Pmode
, addr
);
13297 else if (GET_CODE (addr
) == CONST
)
13299 addr
= XEXP (addr
, 0);
13301 /* We must match stuff we generate before. Assume the only
13302 unspecs that can get here are ours. Not that we could do
13303 anything with them anyway.... */
13304 if (GET_CODE (addr
) == UNSPEC
13305 || (GET_CODE (addr
) == PLUS
13306 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
13308 gcc_assert (GET_CODE (addr
) == PLUS
);
13310 if (GET_CODE (addr
) == PLUS
)
13312 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
13314 /* Check first to see if this is a constant offset from a @GOTOFF
13315 symbol reference. */
13316 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
13317 && CONST_INT_P (op1
))
13321 if (reload_in_progress
)
13322 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13323 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
13325 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
13326 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13327 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13331 emit_move_insn (reg
, new_rtx
);
13337 if (INTVAL (op1
) < -16*1024*1024
13338 || INTVAL (op1
) >= 16*1024*1024)
13340 if (!x86_64_immediate_operand (op1
, Pmode
))
13341 op1
= force_reg (Pmode
, op1
);
13342 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
13348 rtx base
= legitimize_pic_address (op0
, reg
);
13349 enum machine_mode mode
= GET_MODE (base
);
13351 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
13353 if (CONST_INT_P (new_rtx
))
13355 if (INTVAL (new_rtx
) < -16*1024*1024
13356 || INTVAL (new_rtx
) >= 16*1024*1024)
13358 if (!x86_64_immediate_operand (new_rtx
, mode
))
13359 new_rtx
= force_reg (mode
, new_rtx
);
13361 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
13364 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
13368 if (GET_CODE (new_rtx
) == PLUS
13369 && CONSTANT_P (XEXP (new_rtx
, 1)))
13371 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
13372 new_rtx
= XEXP (new_rtx
, 1);
13374 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
13382 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13385 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
13387 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
13389 if (GET_MODE (tp
) != tp_mode
)
13391 gcc_assert (GET_MODE (tp
) == SImode
);
13392 gcc_assert (tp_mode
== DImode
);
13394 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
13398 tp
= copy_to_mode_reg (tp_mode
, tp
);
13403 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13405 static GTY(()) rtx ix86_tls_symbol
;
13408 ix86_tls_get_addr (void)
13410 if (!ix86_tls_symbol
)
13413 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
13414 ? "___tls_get_addr" : "__tls_get_addr");
13416 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
13419 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
13421 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
13423 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
13424 gen_rtx_CONST (Pmode
, unspec
));
13427 return ix86_tls_symbol
;
13430 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13432 static GTY(()) rtx ix86_tls_module_base_symbol
;
13435 ix86_tls_module_base (void)
13437 if (!ix86_tls_module_base_symbol
)
13439 ix86_tls_module_base_symbol
13440 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
13442 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13443 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13446 return ix86_tls_module_base_symbol
;
13449 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13450 false if we expect this to be used for a memory address and true if
13451 we expect to load the address into a register. */
13454 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13456 rtx dest
, base
, off
;
13457 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13458 enum machine_mode tp_mode
= Pmode
;
13461 /* Fall back to global dynamic model if tool chain cannot support local
13463 if (TARGET_SUN_TLS
&& !TARGET_64BIT
13464 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
13465 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
13466 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
13470 case TLS_MODEL_GLOBAL_DYNAMIC
:
13471 dest
= gen_reg_rtx (Pmode
);
13475 if (flag_pic
&& !TARGET_PECOFF
)
13476 pic
= pic_offset_table_rtx
;
13479 pic
= gen_reg_rtx (Pmode
);
13480 emit_insn (gen_set_got (pic
));
13484 if (TARGET_GNU2_TLS
)
13487 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13489 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13491 tp
= get_thread_pointer (Pmode
, true);
13492 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13494 if (GET_MODE (x
) != Pmode
)
13495 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13497 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13501 rtx caddr
= ix86_tls_get_addr ();
13505 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13510 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13511 insns
= get_insns ();
13514 if (GET_MODE (x
) != Pmode
)
13515 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13517 RTL_CONST_CALL_P (insns
) = 1;
13518 emit_libcall_block (insns
, dest
, rax
, x
);
13521 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13525 case TLS_MODEL_LOCAL_DYNAMIC
:
13526 base
= gen_reg_rtx (Pmode
);
13531 pic
= pic_offset_table_rtx
;
13534 pic
= gen_reg_rtx (Pmode
);
13535 emit_insn (gen_set_got (pic
));
13539 if (TARGET_GNU2_TLS
)
13541 rtx tmp
= ix86_tls_module_base ();
13544 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13546 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13548 tp
= get_thread_pointer (Pmode
, true);
13549 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13550 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13554 rtx caddr
= ix86_tls_get_addr ();
13558 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13564 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13565 insns
= get_insns ();
13568 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13569 share the LD_BASE result with other LD model accesses. */
13570 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13571 UNSPEC_TLS_LD_BASE
);
13573 RTL_CONST_CALL_P (insns
) = 1;
13574 emit_libcall_block (insns
, base
, rax
, eqv
);
13577 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13580 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13581 off
= gen_rtx_CONST (Pmode
, off
);
13583 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13585 if (TARGET_GNU2_TLS
)
13587 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13589 if (GET_MODE (x
) != Pmode
)
13590 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13592 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13596 case TLS_MODEL_INITIAL_EXEC
:
13599 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13601 /* The Sun linker took the AMD64 TLS spec literally
13602 and can only handle %rax as destination of the
13603 initial executable code sequence. */
13605 dest
= gen_reg_rtx (DImode
);
13606 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13610 /* Generate DImode references to avoid %fs:(%reg32)
13611 problems and linker IE->LE relaxation bug. */
13614 type
= UNSPEC_GOTNTPOFF
;
13618 if (reload_in_progress
)
13619 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13620 pic
= pic_offset_table_rtx
;
13621 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13623 else if (!TARGET_ANY_GNU_TLS
)
13625 pic
= gen_reg_rtx (Pmode
);
13626 emit_insn (gen_set_got (pic
));
13627 type
= UNSPEC_GOTTPOFF
;
13632 type
= UNSPEC_INDNTPOFF
;
13635 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13636 off
= gen_rtx_CONST (tp_mode
, off
);
13638 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13639 off
= gen_const_mem (tp_mode
, off
);
13640 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13642 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13644 base
= get_thread_pointer (tp_mode
,
13645 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13646 off
= force_reg (tp_mode
, off
);
13647 return gen_rtx_PLUS (tp_mode
, base
, off
);
13651 base
= get_thread_pointer (Pmode
, true);
13652 dest
= gen_reg_rtx (Pmode
);
13653 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13657 case TLS_MODEL_LOCAL_EXEC
:
13658 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13659 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13660 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13661 off
= gen_rtx_CONST (Pmode
, off
);
13663 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13665 base
= get_thread_pointer (Pmode
,
13666 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13667 return gen_rtx_PLUS (Pmode
, base
, off
);
13671 base
= get_thread_pointer (Pmode
, true);
13672 dest
= gen_reg_rtx (Pmode
);
13673 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13678 gcc_unreachable ();
13684 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13685 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13686 unique refptr-DECL symbol corresponding to symbol DECL. */
13688 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13689 htab_t dllimport_map
;
13692 get_dllimport_decl (tree decl
, bool beimport
)
13694 struct tree_map
*h
, in
;
13697 const char *prefix
;
13698 size_t namelen
, prefixlen
;
13703 if (!dllimport_map
)
13704 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13706 in
.hash
= htab_hash_pointer (decl
);
13707 in
.base
.from
= decl
;
13708 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13709 h
= (struct tree_map
*) *loc
;
13713 *loc
= h
= ggc_alloc
<tree_map
> ();
13715 h
->base
.from
= decl
;
13716 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13717 VAR_DECL
, NULL
, ptr_type_node
);
13718 DECL_ARTIFICIAL (to
) = 1;
13719 DECL_IGNORED_P (to
) = 1;
13720 DECL_EXTERNAL (to
) = 1;
13721 TREE_READONLY (to
) = 1;
13723 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13724 name
= targetm
.strip_name_encoding (name
);
13726 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13727 ? "*__imp_" : "*__imp__";
13729 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13730 namelen
= strlen (name
);
13731 prefixlen
= strlen (prefix
);
13732 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13733 memcpy (imp_name
, prefix
, prefixlen
);
13734 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13736 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13737 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13738 SET_SYMBOL_REF_DECL (rtl
, to
);
13739 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13742 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13743 #ifdef SUB_TARGET_RECORD_STUB
13744 SUB_TARGET_RECORD_STUB (name
);
13748 rtl
= gen_const_mem (Pmode
, rtl
);
13749 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13751 SET_DECL_RTL (to
, rtl
);
13752 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13757 /* Expand SYMBOL into its corresponding far-addresse symbol.
13758 WANT_REG is true if we require the result be a register. */
13761 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13766 gcc_assert (SYMBOL_REF_DECL (symbol
));
13767 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13769 x
= DECL_RTL (imp_decl
);
13771 x
= force_reg (Pmode
, x
);
13775 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13776 true if we require the result be a register. */
13779 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13784 gcc_assert (SYMBOL_REF_DECL (symbol
));
13785 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13787 x
= DECL_RTL (imp_decl
);
13789 x
= force_reg (Pmode
, x
);
13793 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13794 is true if we require the result be a register. */
13797 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13799 if (!TARGET_PECOFF
)
13802 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13804 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13805 return legitimize_dllimport_symbol (addr
, inreg
);
13806 if (GET_CODE (addr
) == CONST
13807 && GET_CODE (XEXP (addr
, 0)) == PLUS
13808 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13809 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13811 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13812 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13816 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13818 if (GET_CODE (addr
) == SYMBOL_REF
13819 && !is_imported_p (addr
)
13820 && SYMBOL_REF_EXTERNAL_P (addr
)
13821 && SYMBOL_REF_DECL (addr
))
13822 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13824 if (GET_CODE (addr
) == CONST
13825 && GET_CODE (XEXP (addr
, 0)) == PLUS
13826 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13827 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13828 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13829 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13831 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13832 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13837 /* Try machine-dependent ways of modifying an illegitimate address
13838 to be legitimate. If we find one, return the new, valid address.
13839 This macro is used in only one place: `memory_address' in explow.c.
13841 OLDX is the address as it was before break_out_memory_refs was called.
13842 In some cases it is useful to look at this to decide what needs to be done.
13844 It is always safe for this macro to do nothing. It exists to recognize
13845 opportunities to optimize the output.
13847 For the 80386, we handle X+REG by loading X into a register R and
13848 using R+REG. R will go in a general reg and indexing will be used.
13849 However, if REG is a broken-out memory address or multiplication,
13850 nothing needs to be done because REG can certainly go in a general reg.
13852 When -fpic is used, special handling is needed for symbolic references.
13853 See comments by legitimize_pic_address in i386.c for details. */
13856 ix86_legitimize_address (rtx x
, rtx
, enum machine_mode mode
)
13861 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13863 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13864 if (GET_CODE (x
) == CONST
13865 && GET_CODE (XEXP (x
, 0)) == PLUS
13866 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13867 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13869 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13870 (enum tls_model
) log
, false);
13871 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13874 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13876 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13881 if (flag_pic
&& SYMBOLIC_CONST (x
))
13882 return legitimize_pic_address (x
, 0);
13885 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13886 return machopic_indirect_data_reference (x
, 0);
13889 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13890 if (GET_CODE (x
) == ASHIFT
13891 && CONST_INT_P (XEXP (x
, 1))
13892 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13895 log
= INTVAL (XEXP (x
, 1));
13896 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13897 GEN_INT (1 << log
));
13900 if (GET_CODE (x
) == PLUS
)
13902 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13904 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13905 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13906 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13909 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13910 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13911 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13912 GEN_INT (1 << log
));
13915 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13916 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13917 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13920 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13921 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13922 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13923 GEN_INT (1 << log
));
13926 /* Put multiply first if it isn't already. */
13927 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13929 rtx tmp
= XEXP (x
, 0);
13930 XEXP (x
, 0) = XEXP (x
, 1);
13935 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13936 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13937 created by virtual register instantiation, register elimination, and
13938 similar optimizations. */
13939 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13942 x
= gen_rtx_PLUS (Pmode
,
13943 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13944 XEXP (XEXP (x
, 1), 0)),
13945 XEXP (XEXP (x
, 1), 1));
13949 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13950 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13951 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13952 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13953 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13954 && CONSTANT_P (XEXP (x
, 1)))
13957 rtx other
= NULL_RTX
;
13959 if (CONST_INT_P (XEXP (x
, 1)))
13961 constant
= XEXP (x
, 1);
13962 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13964 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13966 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13967 other
= XEXP (x
, 1);
13975 x
= gen_rtx_PLUS (Pmode
,
13976 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13977 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13978 plus_constant (Pmode
, other
,
13979 INTVAL (constant
)));
13983 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13986 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13989 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
13992 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13995 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
13999 && REG_P (XEXP (x
, 1))
14000 && REG_P (XEXP (x
, 0)))
14003 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
14006 x
= legitimize_pic_address (x
, 0);
14009 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
14012 if (REG_P (XEXP (x
, 0)))
14014 rtx temp
= gen_reg_rtx (Pmode
);
14015 rtx val
= force_operand (XEXP (x
, 1), temp
);
14018 val
= convert_to_mode (Pmode
, val
, 1);
14019 emit_move_insn (temp
, val
);
14022 XEXP (x
, 1) = temp
;
14026 else if (REG_P (XEXP (x
, 1)))
14028 rtx temp
= gen_reg_rtx (Pmode
);
14029 rtx val
= force_operand (XEXP (x
, 0), temp
);
14032 val
= convert_to_mode (Pmode
, val
, 1);
14033 emit_move_insn (temp
, val
);
14036 XEXP (x
, 0) = temp
;
14044 /* Print an integer constant expression in assembler syntax. Addition
14045 and subtraction are the only arithmetic that may appear in these
14046 expressions. FILE is the stdio stream to write to, X is the rtx, and
14047 CODE is the operand print code from the output string. */
14050 output_pic_addr_const (FILE *file
, rtx x
, int code
)
14054 switch (GET_CODE (x
))
14057 gcc_assert (flag_pic
);
14062 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
14063 output_addr_const (file
, x
);
14066 const char *name
= XSTR (x
, 0);
14068 /* Mark the decl as referenced so that cgraph will
14069 output the function. */
14070 if (SYMBOL_REF_DECL (x
))
14071 mark_decl_referenced (SYMBOL_REF_DECL (x
));
14074 if (MACHOPIC_INDIRECT
14075 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14076 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14078 assemble_name (file
, name
);
14080 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
14081 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
14082 fputs ("@PLT", file
);
14089 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
14090 assemble_name (asm_out_file
, buf
);
14094 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14098 /* This used to output parentheses around the expression,
14099 but that does not work on the 386 (either ATT or BSD assembler). */
14100 output_pic_addr_const (file
, XEXP (x
, 0), code
);
14104 if (GET_MODE (x
) == VOIDmode
)
14106 /* We can use %d if the number is <32 bits and positive. */
14107 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
14108 fprintf (file
, "0x%lx%08lx",
14109 (unsigned long) CONST_DOUBLE_HIGH (x
),
14110 (unsigned long) CONST_DOUBLE_LOW (x
));
14112 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
14115 /* We can't handle floating point constants;
14116 TARGET_PRINT_OPERAND must handle them. */
14117 output_operand_lossage ("floating constant misused");
14121 /* Some assemblers need integer constants to appear first. */
14122 if (CONST_INT_P (XEXP (x
, 0)))
14124 output_pic_addr_const (file
, XEXP (x
, 0), code
);
14126 output_pic_addr_const (file
, XEXP (x
, 1), code
);
14130 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
14131 output_pic_addr_const (file
, XEXP (x
, 1), code
);
14133 output_pic_addr_const (file
, XEXP (x
, 0), code
);
14139 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
14140 output_pic_addr_const (file
, XEXP (x
, 0), code
);
14142 output_pic_addr_const (file
, XEXP (x
, 1), code
);
14144 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
14148 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
14150 bool f
= i386_asm_output_addr_const_extra (file
, x
);
14155 gcc_assert (XVECLEN (x
, 0) == 1);
14156 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
14157 switch (XINT (x
, 1))
14160 fputs ("@GOT", file
);
14162 case UNSPEC_GOTOFF
:
14163 fputs ("@GOTOFF", file
);
14165 case UNSPEC_PLTOFF
:
14166 fputs ("@PLTOFF", file
);
14169 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14170 "(%rip)" : "[rip]", file
);
14172 case UNSPEC_GOTPCREL
:
14173 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14174 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
14176 case UNSPEC_GOTTPOFF
:
14177 /* FIXME: This might be @TPOFF in Sun ld too. */
14178 fputs ("@gottpoff", file
);
14181 fputs ("@tpoff", file
);
14183 case UNSPEC_NTPOFF
:
14185 fputs ("@tpoff", file
);
14187 fputs ("@ntpoff", file
);
14189 case UNSPEC_DTPOFF
:
14190 fputs ("@dtpoff", file
);
14192 case UNSPEC_GOTNTPOFF
:
14194 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14195 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
14197 fputs ("@gotntpoff", file
);
14199 case UNSPEC_INDNTPOFF
:
14200 fputs ("@indntpoff", file
);
14203 case UNSPEC_MACHOPIC_OFFSET
:
14205 machopic_output_function_base_name (file
);
14209 output_operand_lossage ("invalid UNSPEC as operand");
14215 output_operand_lossage ("invalid expression as operand");
14219 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14220 We need to emit DTP-relative relocations. */
14222 static void ATTRIBUTE_UNUSED
14223 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
14225 fputs (ASM_LONG
, file
);
14226 output_addr_const (file
, x
);
14227 fputs ("@dtpoff", file
);
14233 fputs (", 0", file
);
14236 gcc_unreachable ();
14240 /* Return true if X is a representation of the PIC register. This copes
14241 with calls from ix86_find_base_term, where the register might have
14242 been replaced by a cselib value. */
14245 ix86_pic_register_p (rtx x
)
14247 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
14248 return (pic_offset_table_rtx
14249 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
14251 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
14254 /* Helper function for ix86_delegitimize_address.
14255 Attempt to delegitimize TLS local-exec accesses. */
14258 ix86_delegitimize_tls_address (rtx orig_x
)
14260 rtx x
= orig_x
, unspec
;
14261 struct ix86_address addr
;
14263 if (!TARGET_TLS_DIRECT_SEG_REFS
)
14267 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
14269 if (ix86_decompose_address (x
, &addr
) == 0
14270 || addr
.seg
!= DEFAULT_TLS_SEG_REG
14271 || addr
.disp
== NULL_RTX
14272 || GET_CODE (addr
.disp
) != CONST
)
14274 unspec
= XEXP (addr
.disp
, 0);
14275 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
14276 unspec
= XEXP (unspec
, 0);
14277 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
14279 x
= XVECEXP (unspec
, 0, 0);
14280 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
14281 if (unspec
!= XEXP (addr
.disp
, 0))
14282 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
14285 rtx idx
= addr
.index
;
14286 if (addr
.scale
!= 1)
14287 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
14288 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
14291 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
14292 if (MEM_P (orig_x
))
14293 x
= replace_equiv_address_nv (orig_x
, x
);
14297 /* In the name of slightly smaller debug output, and to cater to
14298 general assembler lossage, recognize PIC+GOTOFF and turn it back
14299 into a direct symbol reference.
14301 On Darwin, this is necessary to avoid a crash, because Darwin
14302 has a different PIC label for each routine but the DWARF debugging
14303 information is not associated with any particular routine, so it's
14304 necessary to remove references to the PIC label from RTL stored by
14305 the DWARF output code. */
14308 ix86_delegitimize_address (rtx x
)
14310 rtx orig_x
= delegitimize_mem_from_attrs (x
);
14311 /* addend is NULL or some rtx if x is something+GOTOFF where
14312 something doesn't include the PIC register. */
14313 rtx addend
= NULL_RTX
;
14314 /* reg_addend is NULL or a multiple of some register. */
14315 rtx reg_addend
= NULL_RTX
;
14316 /* const_addend is NULL or a const_int. */
14317 rtx const_addend
= NULL_RTX
;
14318 /* This is the result, or NULL. */
14319 rtx result
= NULL_RTX
;
14328 if (GET_CODE (x
) == CONST
14329 && GET_CODE (XEXP (x
, 0)) == PLUS
14330 && GET_MODE (XEXP (x
, 0)) == Pmode
14331 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
14332 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
14333 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
14335 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
14336 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
14337 if (MEM_P (orig_x
))
14338 x
= replace_equiv_address_nv (orig_x
, x
);
14342 if (GET_CODE (x
) == CONST
14343 && GET_CODE (XEXP (x
, 0)) == UNSPEC
14344 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
14345 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
14346 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
14348 x
= XVECEXP (XEXP (x
, 0), 0, 0);
14349 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
14351 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
14359 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
14360 return ix86_delegitimize_tls_address (orig_x
);
14362 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14363 and -mcmodel=medium -fpic. */
14366 if (GET_CODE (x
) != PLUS
14367 || GET_CODE (XEXP (x
, 1)) != CONST
)
14368 return ix86_delegitimize_tls_address (orig_x
);
14370 if (ix86_pic_register_p (XEXP (x
, 0)))
14371 /* %ebx + GOT/GOTOFF */
14373 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14375 /* %ebx + %reg * scale + GOT/GOTOFF */
14376 reg_addend
= XEXP (x
, 0);
14377 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
14378 reg_addend
= XEXP (reg_addend
, 1);
14379 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
14380 reg_addend
= XEXP (reg_addend
, 0);
14383 reg_addend
= NULL_RTX
;
14384 addend
= XEXP (x
, 0);
14388 addend
= XEXP (x
, 0);
14390 x
= XEXP (XEXP (x
, 1), 0);
14391 if (GET_CODE (x
) == PLUS
14392 && CONST_INT_P (XEXP (x
, 1)))
14394 const_addend
= XEXP (x
, 1);
14398 if (GET_CODE (x
) == UNSPEC
14399 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
14400 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
14401 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
14402 && !MEM_P (orig_x
) && !addend
)))
14403 result
= XVECEXP (x
, 0, 0);
14405 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
14406 && !MEM_P (orig_x
))
14407 result
= XVECEXP (x
, 0, 0);
14410 return ix86_delegitimize_tls_address (orig_x
);
14413 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
14415 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
14418 /* If the rest of original X doesn't involve the PIC register, add
14419 addend and subtract pic_offset_table_rtx. This can happen e.g.
14421 leal (%ebx, %ecx, 4), %ecx
14423 movl foo@GOTOFF(%ecx), %edx
14424 in which case we return (%ecx - %ebx) + foo. */
14425 if (pic_offset_table_rtx
)
14426 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
14427 pic_offset_table_rtx
),
14432 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
14434 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
14435 if (result
== NULL_RTX
)
14441 /* If X is a machine specific address (i.e. a symbol or label being
14442 referenced as a displacement from the GOT implemented using an
14443 UNSPEC), then return the base term. Otherwise return X. */
14446 ix86_find_base_term (rtx x
)
14452 if (GET_CODE (x
) != CONST
)
14454 term
= XEXP (x
, 0);
14455 if (GET_CODE (term
) == PLUS
14456 && (CONST_INT_P (XEXP (term
, 1))
14457 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14458 term
= XEXP (term
, 0);
14459 if (GET_CODE (term
) != UNSPEC
14460 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14461 && XINT (term
, 1) != UNSPEC_PCREL
))
14464 return XVECEXP (term
, 0, 0);
14467 return ix86_delegitimize_address (x
);
14471 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14472 bool fp
, FILE *file
)
14474 const char *suffix
;
14476 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14478 code
= ix86_fp_compare_code_to_integer (code
);
14482 code
= reverse_condition (code
);
14533 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14537 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14538 Those same assemblers have the same but opposite lossage on cmov. */
14539 if (mode
== CCmode
)
14540 suffix
= fp
? "nbe" : "a";
14542 gcc_unreachable ();
14558 gcc_unreachable ();
14562 if (mode
== CCmode
)
14564 else if (mode
== CCCmode
)
14567 gcc_unreachable ();
14583 gcc_unreachable ();
14587 if (mode
== CCmode
)
14588 suffix
= fp
? "nb" : "ae";
14589 else if (mode
== CCCmode
)
14592 gcc_unreachable ();
14595 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14599 if (mode
== CCmode
)
14602 gcc_unreachable ();
14605 suffix
= fp
? "u" : "p";
14608 suffix
= fp
? "nu" : "np";
14611 gcc_unreachable ();
14613 fputs (suffix
, file
);
14616 /* Print the name of register X to FILE based on its machine mode and number.
14617 If CODE is 'w', pretend the mode is HImode.
14618 If CODE is 'b', pretend the mode is QImode.
14619 If CODE is 'k', pretend the mode is SImode.
14620 If CODE is 'q', pretend the mode is DImode.
14621 If CODE is 'x', pretend the mode is V4SFmode.
14622 If CODE is 't', pretend the mode is V8SFmode.
14623 If CODE is 'g', pretend the mode is V16SFmode.
14624 If CODE is 'h', pretend the reg is the 'high' byte register.
14625 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14626 If CODE is 'd', duplicate the operand for AVX instruction.
14630 print_reg (rtx x
, int code
, FILE *file
)
14633 unsigned int regno
;
14634 bool duplicated
= code
== 'd' && TARGET_AVX
;
14636 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14641 gcc_assert (TARGET_64BIT
);
14642 fputs ("rip", file
);
14646 regno
= true_regnum (x
);
14647 gcc_assert (regno
!= ARG_POINTER_REGNUM
14648 && regno
!= FRAME_POINTER_REGNUM
14649 && regno
!= FLAGS_REG
14650 && regno
!= FPSR_REG
14651 && regno
!= FPCR_REG
);
14653 if (code
== 'w' || MMX_REG_P (x
))
14655 else if (code
== 'b')
14657 else if (code
== 'k')
14659 else if (code
== 'q')
14661 else if (code
== 'y')
14663 else if (code
== 'h')
14665 else if (code
== 'x')
14667 else if (code
== 't')
14669 else if (code
== 'g')
14672 code
= GET_MODE_SIZE (GET_MODE (x
));
14674 /* Irritatingly, AMD extended registers use different naming convention
14675 from the normal registers: "r%d[bwd]" */
14676 if (REX_INT_REGNO_P (regno
))
14678 gcc_assert (TARGET_64BIT
);
14680 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14684 error ("extended registers have no high halves");
14699 error ("unsupported operand size for extended register");
14709 if (STACK_TOP_P (x
))
14718 if (! ANY_FP_REG_P (x
) && ! ANY_MASK_REG_P (x
))
14719 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14724 reg
= hi_reg_name
[regno
];
14727 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14729 reg
= qi_reg_name
[regno
];
14732 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14734 reg
= qi_high_reg_name
[regno
];
14739 gcc_assert (!duplicated
);
14741 fputs (hi_reg_name
[regno
] + 1, file
);
14747 gcc_assert (!duplicated
);
14749 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14754 gcc_unreachable ();
14760 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14761 fprintf (file
, ", %%%s", reg
);
14763 fprintf (file
, ", %s", reg
);
14767 /* Meaning of CODE:
14768 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14769 C -- print opcode suffix for set/cmov insn.
14770 c -- like C, but print reversed condition
14771 F,f -- likewise, but for floating-point.
14772 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14774 R -- print embeded rounding and sae.
14775 r -- print only sae.
14776 z -- print the opcode suffix for the size of the current operand.
14777 Z -- likewise, with special suffixes for x87 instructions.
14778 * -- print a star (in certain assembler syntax)
14779 A -- print an absolute memory reference.
14780 E -- print address with DImode register names if TARGET_64BIT.
14781 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14782 s -- print a shift double count, followed by the assemblers argument
14784 b -- print the QImode name of the register for the indicated operand.
14785 %b0 would print %al if operands[0] is reg 0.
14786 w -- likewise, print the HImode name of the register.
14787 k -- likewise, print the SImode name of the register.
14788 q -- likewise, print the DImode name of the register.
14789 x -- likewise, print the V4SFmode name of the register.
14790 t -- likewise, print the V8SFmode name of the register.
14791 g -- likewise, print the V16SFmode name of the register.
14792 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14793 y -- print "st(0)" instead of "st" as a register.
14794 d -- print duplicated register operand for AVX instruction.
14795 D -- print condition for SSE cmp instruction.
14796 P -- if PIC, print an @PLT suffix.
14797 p -- print raw symbol name.
14798 X -- don't print any sort of PIC '@' suffix for a symbol.
14799 & -- print some in-use local-dynamic symbol name.
14800 H -- print a memory address offset by 8; used for sse high-parts
14801 Y -- print condition for XOP pcom* instruction.
14802 + -- print a branch hint as 'cs' or 'ds' prefix
14803 ; -- print a semicolon (after prefixes due to bug in older gas).
14804 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14805 @ -- print a segment register of thread base pointer load
14806 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14810 ix86_print_operand (FILE *file
, rtx x
, int code
)
14817 switch (ASSEMBLER_DIALECT
)
14824 /* Intel syntax. For absolute addresses, registers should not
14825 be surrounded by braces. */
14829 ix86_print_operand (file
, x
, 0);
14836 gcc_unreachable ();
14839 ix86_print_operand (file
, x
, 0);
14843 /* Wrap address in an UNSPEC to declare special handling. */
14845 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14847 output_address (x
);
14851 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14856 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14861 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14866 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14871 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14876 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14881 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14882 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14885 switch (GET_MODE_SIZE (GET_MODE (x
)))
14900 output_operand_lossage
14901 ("invalid operand size for operand code 'O'");
14910 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14912 /* Opcodes don't get size suffixes if using Intel opcodes. */
14913 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14916 switch (GET_MODE_SIZE (GET_MODE (x
)))
14935 output_operand_lossage
14936 ("invalid operand size for operand code 'z'");
14941 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14943 (0, "non-integer operand used with operand code 'z'");
14947 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14948 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14951 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14953 switch (GET_MODE_SIZE (GET_MODE (x
)))
14956 #ifdef HAVE_AS_IX86_FILDS
14966 #ifdef HAVE_AS_IX86_FILDQ
14969 fputs ("ll", file
);
14977 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14979 /* 387 opcodes don't get size suffixes
14980 if the operands are registers. */
14981 if (STACK_REG_P (x
))
14984 switch (GET_MODE_SIZE (GET_MODE (x
)))
15005 output_operand_lossage
15006 ("invalid operand type used with operand code 'Z'");
15010 output_operand_lossage
15011 ("invalid operand size for operand code 'Z'");
15030 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
15032 ix86_print_operand (file
, x
, 0);
15033 fputs (", ", file
);
15038 switch (GET_CODE (x
))
15041 fputs ("neq", file
);
15044 fputs ("eq", file
);
15048 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
15052 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
15056 fputs ("le", file
);
15060 fputs ("lt", file
);
15063 fputs ("unord", file
);
15066 fputs ("ord", file
);
15069 fputs ("ueq", file
);
15072 fputs ("nlt", file
);
15075 fputs ("nle", file
);
15078 fputs ("ule", file
);
15081 fputs ("ult", file
);
15084 fputs ("une", file
);
15087 output_operand_lossage ("operand is not a condition code, "
15088 "invalid operand code 'Y'");
15094 /* Little bit of braindamage here. The SSE compare instructions
15095 does use completely different names for the comparisons that the
15096 fp conditional moves. */
15097 switch (GET_CODE (x
))
15102 fputs ("eq_us", file
);
15106 fputs ("eq", file
);
15111 fputs ("nge", file
);
15115 fputs ("lt", file
);
15120 fputs ("ngt", file
);
15124 fputs ("le", file
);
15127 fputs ("unord", file
);
15132 fputs ("neq_oq", file
);
15136 fputs ("neq", file
);
15141 fputs ("ge", file
);
15145 fputs ("nlt", file
);
15150 fputs ("gt", file
);
15154 fputs ("nle", file
);
15157 fputs ("ord", file
);
15160 output_operand_lossage ("operand is not a condition code, "
15161 "invalid operand code 'D'");
15168 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15169 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15175 if (!COMPARISON_P (x
))
15177 output_operand_lossage ("operand is not a condition code, "
15178 "invalid operand code '%c'", code
);
15181 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
15182 code
== 'c' || code
== 'f',
15183 code
== 'F' || code
== 'f',
15188 if (!offsettable_memref_p (x
))
15190 output_operand_lossage ("operand is not an offsettable memory "
15191 "reference, invalid operand code 'H'");
15194 /* It doesn't actually matter what mode we use here, as we're
15195 only going to use this for printing. */
15196 x
= adjust_address_nv (x
, DImode
, 8);
15197 /* Output 'qword ptr' for intel assembler dialect. */
15198 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
15203 gcc_assert (CONST_INT_P (x
));
15205 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
15206 #ifdef HAVE_AS_IX86_HLE
15207 fputs ("xacquire ", file
);
15209 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
15211 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
15212 #ifdef HAVE_AS_IX86_HLE
15213 fputs ("xrelease ", file
);
15215 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
15217 /* We do not want to print value of the operand. */
15221 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
15222 fputs ("{z}", file
);
15226 gcc_assert (CONST_INT_P (x
));
15227 gcc_assert (INTVAL (x
) == ROUND_SAE
);
15229 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
15230 fputs (", ", file
);
15232 fputs ("{sae}", file
);
15234 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15235 fputs (", ", file
);
15240 gcc_assert (CONST_INT_P (x
));
15242 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
15243 fputs (", ", file
);
15245 switch (INTVAL (x
))
15247 case ROUND_NEAREST_INT
| ROUND_SAE
:
15248 fputs ("{rn-sae}", file
);
15250 case ROUND_NEG_INF
| ROUND_SAE
:
15251 fputs ("{rd-sae}", file
);
15253 case ROUND_POS_INF
| ROUND_SAE
:
15254 fputs ("{ru-sae}", file
);
15256 case ROUND_ZERO
| ROUND_SAE
:
15257 fputs ("{rz-sae}", file
);
15260 gcc_unreachable ();
15263 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15264 fputs (", ", file
);
15269 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15275 const char *name
= get_some_local_dynamic_name ();
15277 output_operand_lossage ("'%%&' used without any "
15278 "local dynamic TLS references");
15280 assemble_name (file
, name
);
15289 || optimize_function_for_size_p (cfun
)
15290 || !TARGET_BRANCH_PREDICTION_HINTS
)
15293 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
15296 int pred_val
= XINT (x
, 0);
15298 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
15299 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
15301 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
15303 = final_forward_branch_p (current_output_insn
) == 0;
15305 /* Emit hints only in the case default branch prediction
15306 heuristics would fail. */
15307 if (taken
!= cputaken
)
15309 /* We use 3e (DS) prefix for taken branches and
15310 2e (CS) prefix for not taken branches. */
15312 fputs ("ds ; ", file
);
15314 fputs ("cs ; ", file
);
15322 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15328 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15331 /* The kernel uses a different segment register for performance
15332 reasons; a system call would not have to trash the userspace
15333 segment register, which would be expensive. */
15334 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
15335 fputs ("fs", file
);
15337 fputs ("gs", file
);
15341 putc (TARGET_AVX2
? 'i' : 'f', file
);
15345 if (TARGET_64BIT
&& Pmode
!= word_mode
)
15346 fputs ("addr32 ", file
);
15350 output_operand_lossage ("invalid operand code '%c'", code
);
15355 print_reg (x
, code
, file
);
15357 else if (MEM_P (x
))
15359 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15360 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
15361 && GET_MODE (x
) != BLKmode
)
15364 switch (GET_MODE_SIZE (GET_MODE (x
)))
15366 case 1: size
= "BYTE"; break;
15367 case 2: size
= "WORD"; break;
15368 case 4: size
= "DWORD"; break;
15369 case 8: size
= "QWORD"; break;
15370 case 12: size
= "TBYTE"; break;
15372 if (GET_MODE (x
) == XFmode
)
15377 case 32: size
= "YMMWORD"; break;
15378 case 64: size
= "ZMMWORD"; break;
15380 gcc_unreachable ();
15383 /* Check for explicit size override (codes 'b', 'w', 'k',
15387 else if (code
== 'w')
15389 else if (code
== 'k')
15391 else if (code
== 'q')
15393 else if (code
== 'x')
15396 fputs (size
, file
);
15397 fputs (" PTR ", file
);
15401 /* Avoid (%rip) for call operands. */
15402 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
15403 && !CONST_INT_P (x
))
15404 output_addr_const (file
, x
);
15405 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
15406 output_operand_lossage ("invalid constraints for operand");
15408 output_address (x
);
15411 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
15416 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15417 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
15419 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15421 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15423 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
15424 (unsigned long long) (int) l
);
15426 fprintf (file
, "0x%08x", (unsigned int) l
);
15429 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
15434 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15435 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
15437 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15439 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
15442 /* These float cases don't actually occur as immediate operands. */
15443 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
15447 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
15448 fputs (dstr
, file
);
15453 /* We have patterns that allow zero sets of memory, for instance.
15454 In 64-bit mode, we should probably support all 8-byte vectors,
15455 since we can in fact encode that into an immediate. */
15456 if (GET_CODE (x
) == CONST_VECTOR
)
15458 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
15462 if (code
!= 'P' && code
!= 'p')
15464 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
15466 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15469 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
15470 || GET_CODE (x
) == LABEL_REF
)
15472 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15475 fputs ("OFFSET FLAT:", file
);
15478 if (CONST_INT_P (x
))
15479 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15480 else if (flag_pic
|| MACHOPIC_INDIRECT
)
15481 output_pic_addr_const (file
, x
, code
);
15483 output_addr_const (file
, x
);
15488 ix86_print_operand_punct_valid_p (unsigned char code
)
15490 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15491 || code
== ';' || code
== '~' || code
== '^');
15494 /* Print a memory operand whose address is ADDR. */
15497 ix86_print_operand_address (FILE *file
, rtx addr
)
15499 struct ix86_address parts
;
15500 rtx base
, index
, disp
;
15506 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15508 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15509 gcc_assert (parts
.index
== NULL_RTX
);
15510 parts
.index
= XVECEXP (addr
, 0, 1);
15511 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15512 addr
= XVECEXP (addr
, 0, 0);
15515 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15517 gcc_assert (TARGET_64BIT
);
15518 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15522 ok
= ix86_decompose_address (addr
, &parts
);
15527 index
= parts
.index
;
15529 scale
= parts
.scale
;
15537 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15539 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15542 gcc_unreachable ();
15545 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15546 if (TARGET_64BIT
&& !base
&& !index
)
15550 if (GET_CODE (disp
) == CONST
15551 && GET_CODE (XEXP (disp
, 0)) == PLUS
15552 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15553 symbol
= XEXP (XEXP (disp
, 0), 0);
15555 if (GET_CODE (symbol
) == LABEL_REF
15556 || (GET_CODE (symbol
) == SYMBOL_REF
15557 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15560 if (!base
&& !index
)
15562 /* Displacement only requires special attention. */
15564 if (CONST_INT_P (disp
))
15566 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15567 fputs ("ds:", file
);
15568 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15571 output_pic_addr_const (file
, disp
, 0);
15573 output_addr_const (file
, disp
);
15577 /* Print SImode register names to force addr32 prefix. */
15578 if (SImode_address_operand (addr
, VOIDmode
))
15580 #ifdef ENABLE_CHECKING
15581 gcc_assert (TARGET_64BIT
);
15582 switch (GET_CODE (addr
))
15585 gcc_assert (GET_MODE (addr
) == SImode
);
15586 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15590 gcc_assert (GET_MODE (addr
) == DImode
);
15593 gcc_unreachable ();
15596 gcc_assert (!code
);
15602 && CONST_INT_P (disp
)
15603 && INTVAL (disp
) < -16*1024*1024)
15605 /* X32 runs in 64-bit mode, where displacement, DISP, in
15606 address DISP(%r64), is encoded as 32-bit immediate sign-
15607 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15608 address is %r64 + 0xffffffffbffffd00. When %r64 <
15609 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15610 which is invalid for x32. The correct address is %r64
15611 - 0x40000300 == 0xf7ffdd64. To properly encode
15612 -0x40000300(%r64) for x32, we zero-extend negative
15613 displacement by forcing addr32 prefix which truncates
15614 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15615 zero-extend all negative displacements, including -1(%rsp).
15616 However, for small negative displacements, sign-extension
15617 won't cause overflow. We only zero-extend negative
15618 displacements if they < -16*1024*1024, which is also used
15619 to check legitimate address displacements for PIC. */
15623 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15628 output_pic_addr_const (file
, disp
, 0);
15629 else if (GET_CODE (disp
) == LABEL_REF
)
15630 output_asm_label (disp
);
15632 output_addr_const (file
, disp
);
15637 print_reg (base
, code
, file
);
15641 print_reg (index
, vsib
? 0 : code
, file
);
15642 if (scale
!= 1 || vsib
)
15643 fprintf (file
, ",%d", scale
);
15649 rtx offset
= NULL_RTX
;
15653 /* Pull out the offset of a symbol; print any symbol itself. */
15654 if (GET_CODE (disp
) == CONST
15655 && GET_CODE (XEXP (disp
, 0)) == PLUS
15656 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15658 offset
= XEXP (XEXP (disp
, 0), 1);
15659 disp
= gen_rtx_CONST (VOIDmode
,
15660 XEXP (XEXP (disp
, 0), 0));
15664 output_pic_addr_const (file
, disp
, 0);
15665 else if (GET_CODE (disp
) == LABEL_REF
)
15666 output_asm_label (disp
);
15667 else if (CONST_INT_P (disp
))
15670 output_addr_const (file
, disp
);
15676 print_reg (base
, code
, file
);
15679 if (INTVAL (offset
) >= 0)
15681 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15685 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15692 print_reg (index
, vsib
? 0 : code
, file
);
15693 if (scale
!= 1 || vsib
)
15694 fprintf (file
, "*%d", scale
);
15701 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15704 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15708 if (GET_CODE (x
) != UNSPEC
)
15711 op
= XVECEXP (x
, 0, 0);
15712 switch (XINT (x
, 1))
15714 case UNSPEC_GOTTPOFF
:
15715 output_addr_const (file
, op
);
15716 /* FIXME: This might be @TPOFF in Sun ld. */
15717 fputs ("@gottpoff", file
);
15720 output_addr_const (file
, op
);
15721 fputs ("@tpoff", file
);
15723 case UNSPEC_NTPOFF
:
15724 output_addr_const (file
, op
);
15726 fputs ("@tpoff", file
);
15728 fputs ("@ntpoff", file
);
15730 case UNSPEC_DTPOFF
:
15731 output_addr_const (file
, op
);
15732 fputs ("@dtpoff", file
);
15734 case UNSPEC_GOTNTPOFF
:
15735 output_addr_const (file
, op
);
15737 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15738 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15740 fputs ("@gotntpoff", file
);
15742 case UNSPEC_INDNTPOFF
:
15743 output_addr_const (file
, op
);
15744 fputs ("@indntpoff", file
);
15747 case UNSPEC_MACHOPIC_OFFSET
:
15748 output_addr_const (file
, op
);
15750 machopic_output_function_base_name (file
);
15754 case UNSPEC_STACK_CHECK
:
15758 gcc_assert (flag_split_stack
);
15760 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15761 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15763 gcc_unreachable ();
15766 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15777 /* Split one or more double-mode RTL references into pairs of half-mode
15778 references. The RTL can be REG, offsettable MEM, integer constant, or
15779 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15780 split and "num" is its length. lo_half and hi_half are output arrays
15781 that parallel "operands". */
15784 split_double_mode (enum machine_mode mode
, rtx operands
[],
15785 int num
, rtx lo_half
[], rtx hi_half
[])
15787 enum machine_mode half_mode
;
15793 half_mode
= DImode
;
15796 half_mode
= SImode
;
15799 gcc_unreachable ();
15802 byte
= GET_MODE_SIZE (half_mode
);
15806 rtx op
= operands
[num
];
15808 /* simplify_subreg refuse to split volatile memory addresses,
15809 but we still have to handle it. */
15812 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15813 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15817 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15818 GET_MODE (op
) == VOIDmode
15819 ? mode
: GET_MODE (op
), 0);
15820 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15821 GET_MODE (op
) == VOIDmode
15822 ? mode
: GET_MODE (op
), byte
);
15827 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15828 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15829 is the expression of the binary operation. The output may either be
15830 emitted here, or returned to the caller, like all output_* functions.
15832 There is no guarantee that the operands are the same mode, as they
15833 might be within FLOAT or FLOAT_EXTEND expressions. */
15835 #ifndef SYSV386_COMPAT
15836 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15837 wants to fix the assemblers because that causes incompatibility
15838 with gcc. No-one wants to fix gcc because that causes
15839 incompatibility with assemblers... You can use the option of
15840 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15841 #define SYSV386_COMPAT 1
15845 output_387_binary_op (rtx insn
, rtx
*operands
)
15847 static char buf
[40];
15850 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15852 #ifdef ENABLE_CHECKING
15853 /* Even if we do not want to check the inputs, this documents input
15854 constraints. Which helps in understanding the following code. */
15855 if (STACK_REG_P (operands
[0])
15856 && ((REG_P (operands
[1])
15857 && REGNO (operands
[0]) == REGNO (operands
[1])
15858 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15859 || (REG_P (operands
[2])
15860 && REGNO (operands
[0]) == REGNO (operands
[2])
15861 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15862 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15865 gcc_assert (is_sse
);
15868 switch (GET_CODE (operands
[3]))
15871 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15872 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15880 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15881 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15889 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15890 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15898 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15899 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15907 gcc_unreachable ();
15914 strcpy (buf
, ssep
);
15915 if (GET_MODE (operands
[0]) == SFmode
)
15916 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15918 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15922 strcpy (buf
, ssep
+ 1);
15923 if (GET_MODE (operands
[0]) == SFmode
)
15924 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15926 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15932 switch (GET_CODE (operands
[3]))
15936 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15938 rtx temp
= operands
[2];
15939 operands
[2] = operands
[1];
15940 operands
[1] = temp
;
15943 /* know operands[0] == operands[1]. */
15945 if (MEM_P (operands
[2]))
15951 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15953 if (STACK_TOP_P (operands
[0]))
15954 /* How is it that we are storing to a dead operand[2]?
15955 Well, presumably operands[1] is dead too. We can't
15956 store the result to st(0) as st(0) gets popped on this
15957 instruction. Instead store to operands[2] (which I
15958 think has to be st(1)). st(1) will be popped later.
15959 gcc <= 2.8.1 didn't have this check and generated
15960 assembly code that the Unixware assembler rejected. */
15961 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15963 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15967 if (STACK_TOP_P (operands
[0]))
15968 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15970 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15975 if (MEM_P (operands
[1]))
15981 if (MEM_P (operands
[2]))
15987 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15990 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15991 derived assemblers, confusingly reverse the direction of
15992 the operation for fsub{r} and fdiv{r} when the
15993 destination register is not st(0). The Intel assembler
15994 doesn't have this brain damage. Read !SYSV386_COMPAT to
15995 figure out what the hardware really does. */
15996 if (STACK_TOP_P (operands
[0]))
15997 p
= "{p\t%0, %2|rp\t%2, %0}";
15999 p
= "{rp\t%2, %0|p\t%0, %2}";
16001 if (STACK_TOP_P (operands
[0]))
16002 /* As above for fmul/fadd, we can't store to st(0). */
16003 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16005 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16010 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
16013 if (STACK_TOP_P (operands
[0]))
16014 p
= "{rp\t%0, %1|p\t%1, %0}";
16016 p
= "{p\t%1, %0|rp\t%0, %1}";
16018 if (STACK_TOP_P (operands
[0]))
16019 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16021 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16026 if (STACK_TOP_P (operands
[0]))
16028 if (STACK_TOP_P (operands
[1]))
16029 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16031 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16034 else if (STACK_TOP_P (operands
[1]))
16037 p
= "{\t%1, %0|r\t%0, %1}";
16039 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16045 p
= "{r\t%2, %0|\t%0, %2}";
16047 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16053 gcc_unreachable ();
16060 /* Check if a 256bit AVX register is referenced inside of EXP. */
16063 ix86_check_avx256_register (rtx
*pexp
, void *)
16067 if (GET_CODE (exp
) == SUBREG
)
16068 exp
= SUBREG_REG (exp
);
16071 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
16077 /* Return needed mode for entity in optimize_mode_switching pass. */
16080 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
16086 /* Needed mode is set to AVX_U128_CLEAN if there are
16087 no 256bit modes used in function arguments. */
16088 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
16090 link
= XEXP (link
, 1))
16092 if (GET_CODE (XEXP (link
, 0)) == USE
)
16094 rtx arg
= XEXP (XEXP (link
, 0), 0);
16096 if (ix86_check_avx256_register (&arg
, NULL
))
16097 return AVX_U128_DIRTY
;
16101 return AVX_U128_CLEAN
;
16104 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16105 changes state only when a 256bit register is written to, but we need
16106 to prevent the compiler from moving optimal insertion point above
16107 eventual read from 256bit register. */
16108 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
16109 return AVX_U128_DIRTY
;
16111 return AVX_U128_ANY
;
16114 /* Return mode that i387 must be switched into
16115 prior to the execution of insn. */
16118 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
16120 enum attr_i387_cw mode
;
16122 /* The mode UNINITIALIZED is used to store control word after a
16123 function call or ASM pattern. The mode ANY specify that function
16124 has no requirements on the control word and make no changes in the
16125 bits we are interested in. */
16128 || (NONJUMP_INSN_P (insn
)
16129 && (asm_noperands (PATTERN (insn
)) >= 0
16130 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
16131 return I387_CW_UNINITIALIZED
;
16133 if (recog_memoized (insn
) < 0)
16134 return I387_CW_ANY
;
16136 mode
= get_attr_i387_cw (insn
);
16141 if (mode
== I387_CW_TRUNC
)
16146 if (mode
== I387_CW_FLOOR
)
16151 if (mode
== I387_CW_CEIL
)
16156 if (mode
== I387_CW_MASK_PM
)
16161 gcc_unreachable ();
16164 return I387_CW_ANY
;
16167 /* Return mode that entity must be switched into
16168 prior to the execution of insn. */
16171 ix86_mode_needed (int entity
, rtx_insn
*insn
)
16176 return ix86_avx_u128_mode_needed (insn
);
16181 return ix86_i387_mode_needed (entity
, insn
);
16183 gcc_unreachable ();
16188 /* Check if a 256bit AVX register is referenced in stores. */
16191 ix86_check_avx256_stores (rtx dest
, const_rtx
, void *data
)
16193 if (ix86_check_avx256_register (&dest
, NULL
))
16195 bool *used
= (bool *) data
;
16200 /* Calculate mode of upper 128bit AVX registers after the insn. */
16203 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
16205 rtx pat
= PATTERN (insn
);
16207 if (vzeroupper_operation (pat
, VOIDmode
)
16208 || vzeroall_operation (pat
, VOIDmode
))
16209 return AVX_U128_CLEAN
;
16211 /* We know that state is clean after CALL insn if there are no
16212 256bit registers used in the function return register. */
16215 bool avx_reg256_found
= false;
16216 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
16218 return avx_reg256_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
16221 /* Otherwise, return current mode. Remember that if insn
16222 references AVX 256bit registers, the mode was already changed
16223 to DIRTY from MODE_NEEDED. */
16227 /* Return the mode that an insn results in. */
16230 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
16235 return ix86_avx_u128_mode_after (mode
, insn
);
16242 gcc_unreachable ();
16247 ix86_avx_u128_mode_entry (void)
16251 /* Entry mode is set to AVX_U128_DIRTY if there are
16252 256bit modes used in function arguments. */
16253 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
16254 arg
= TREE_CHAIN (arg
))
16256 rtx incoming
= DECL_INCOMING_RTL (arg
);
16258 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
16259 return AVX_U128_DIRTY
;
16262 return AVX_U128_CLEAN
;
16265 /* Return a mode that ENTITY is assumed to be
16266 switched to at function entry. */
16269 ix86_mode_entry (int entity
)
16274 return ix86_avx_u128_mode_entry ();
16279 return I387_CW_ANY
;
16281 gcc_unreachable ();
16286 ix86_avx_u128_mode_exit (void)
16288 rtx reg
= crtl
->return_rtx
;
16290 /* Exit mode is set to AVX_U128_DIRTY if there are
16291 256bit modes used in the function return register. */
16292 if (reg
&& ix86_check_avx256_register (®
, NULL
))
16293 return AVX_U128_DIRTY
;
16295 return AVX_U128_CLEAN
;
16298 /* Return a mode that ENTITY is assumed to be
16299 switched to at function exit. */
16302 ix86_mode_exit (int entity
)
16307 return ix86_avx_u128_mode_exit ();
16312 return I387_CW_ANY
;
16314 gcc_unreachable ();
16319 ix86_mode_priority (int, int n
)
16324 /* Output code to initialize control word copies used by trunc?f?i and
16325 rounding patterns. CURRENT_MODE is set to current control word,
16326 while NEW_MODE is set to new control word. */
16329 emit_i387_cw_initialization (int mode
)
16331 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
16334 enum ix86_stack_slot slot
;
16336 rtx reg
= gen_reg_rtx (HImode
);
16338 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
16339 emit_move_insn (reg
, copy_rtx (stored_mode
));
16341 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
16342 || optimize_insn_for_size_p ())
16346 case I387_CW_TRUNC
:
16347 /* round toward zero (truncate) */
16348 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
16349 slot
= SLOT_CW_TRUNC
;
16352 case I387_CW_FLOOR
:
16353 /* round down toward -oo */
16354 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16355 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
16356 slot
= SLOT_CW_FLOOR
;
16360 /* round up toward +oo */
16361 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16362 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
16363 slot
= SLOT_CW_CEIL
;
16366 case I387_CW_MASK_PM
:
16367 /* mask precision exception for nearbyint() */
16368 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16369 slot
= SLOT_CW_MASK_PM
;
16373 gcc_unreachable ();
16380 case I387_CW_TRUNC
:
16381 /* round toward zero (truncate) */
16382 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
16383 slot
= SLOT_CW_TRUNC
;
16386 case I387_CW_FLOOR
:
16387 /* round down toward -oo */
16388 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
16389 slot
= SLOT_CW_FLOOR
;
16393 /* round up toward +oo */
16394 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
16395 slot
= SLOT_CW_CEIL
;
16398 case I387_CW_MASK_PM
:
16399 /* mask precision exception for nearbyint() */
16400 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16401 slot
= SLOT_CW_MASK_PM
;
16405 gcc_unreachable ();
16409 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
16411 new_mode
= assign_386_stack_local (HImode
, slot
);
16412 emit_move_insn (new_mode
, reg
);
16415 /* Emit vzeroupper. */
16418 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
16422 /* Cancel automatic vzeroupper insertion if there are
16423 live call-saved SSE registers at the insertion point. */
16425 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
16426 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16430 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
16431 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16434 emit_insn (gen_avx_vzeroupper ());
16437 /* Generate one or more insns to set ENTITY to MODE. */
16439 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16440 is the set of hard registers live at the point where the insn(s)
16441 are to be inserted. */
16444 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
16445 HARD_REG_SET regs_live
)
16450 if (mode
== AVX_U128_CLEAN
)
16451 ix86_avx_emit_vzeroupper (regs_live
);
16457 if (mode
!= I387_CW_ANY
16458 && mode
!= I387_CW_UNINITIALIZED
)
16459 emit_i387_cw_initialization (mode
);
16462 gcc_unreachable ();
16466 /* Output code for INSN to convert a float to a signed int. OPERANDS
16467 are the insn operands. The output may be [HSD]Imode and the input
16468 operand may be [SDX]Fmode. */
16471 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
16473 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16474 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
16475 int round_mode
= get_attr_i387_cw (insn
);
16477 /* Jump through a hoop or two for DImode, since the hardware has no
16478 non-popping instruction. We used to do this a different way, but
16479 that was somewhat fragile and broke with post-reload splitters. */
16480 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
16481 output_asm_insn ("fld\t%y1", operands
);
16483 gcc_assert (STACK_TOP_P (operands
[1]));
16484 gcc_assert (MEM_P (operands
[0]));
16485 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
16488 output_asm_insn ("fisttp%Z0\t%0", operands
);
16491 if (round_mode
!= I387_CW_ANY
)
16492 output_asm_insn ("fldcw\t%3", operands
);
16493 if (stack_top_dies
|| dimode_p
)
16494 output_asm_insn ("fistp%Z0\t%0", operands
);
16496 output_asm_insn ("fist%Z0\t%0", operands
);
16497 if (round_mode
!= I387_CW_ANY
)
16498 output_asm_insn ("fldcw\t%2", operands
);
16504 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16505 have the values zero or one, indicates the ffreep insn's operand
16506 from the OPERANDS array. */
16508 static const char *
16509 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16511 if (TARGET_USE_FFREEP
)
16512 #ifdef HAVE_AS_IX86_FFREEP
16513 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16516 static char retval
[32];
16517 int regno
= REGNO (operands
[opno
]);
16519 gcc_assert (STACK_REGNO_P (regno
));
16521 regno
-= FIRST_STACK_REG
;
16523 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16528 return opno
? "fstp\t%y1" : "fstp\t%y0";
16532 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16533 should be used. UNORDERED_P is true when fucom should be used. */
16536 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16538 int stack_top_dies
;
16539 rtx cmp_op0
, cmp_op1
;
16540 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16544 cmp_op0
= operands
[0];
16545 cmp_op1
= operands
[1];
16549 cmp_op0
= operands
[1];
16550 cmp_op1
= operands
[2];
16555 if (GET_MODE (operands
[0]) == SFmode
)
16557 return "%vucomiss\t{%1, %0|%0, %1}";
16559 return "%vcomiss\t{%1, %0|%0, %1}";
16562 return "%vucomisd\t{%1, %0|%0, %1}";
16564 return "%vcomisd\t{%1, %0|%0, %1}";
16567 gcc_assert (STACK_TOP_P (cmp_op0
));
16569 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16571 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16573 if (stack_top_dies
)
16575 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16576 return output_387_ffreep (operands
, 1);
16579 return "ftst\n\tfnstsw\t%0";
16582 if (STACK_REG_P (cmp_op1
)
16584 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16585 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16587 /* If both the top of the 387 stack dies, and the other operand
16588 is also a stack register that dies, then this must be a
16589 `fcompp' float compare */
16593 /* There is no double popping fcomi variant. Fortunately,
16594 eflags is immune from the fstp's cc clobbering. */
16596 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16598 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16599 return output_387_ffreep (operands
, 0);
16604 return "fucompp\n\tfnstsw\t%0";
16606 return "fcompp\n\tfnstsw\t%0";
16611 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16613 static const char * const alt
[16] =
16615 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16616 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16617 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16618 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16620 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16621 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16625 "fcomi\t{%y1, %0|%0, %y1}",
16626 "fcomip\t{%y1, %0|%0, %y1}",
16627 "fucomi\t{%y1, %0|%0, %y1}",
16628 "fucomip\t{%y1, %0|%0, %y1}",
16639 mask
= eflags_p
<< 3;
16640 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16641 mask
|= unordered_p
<< 1;
16642 mask
|= stack_top_dies
;
16644 gcc_assert (mask
< 16);
16653 ix86_output_addr_vec_elt (FILE *file
, int value
)
16655 const char *directive
= ASM_LONG
;
16659 directive
= ASM_QUAD
;
16661 gcc_assert (!TARGET_64BIT
);
16664 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16668 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16670 const char *directive
= ASM_LONG
;
16673 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16674 directive
= ASM_QUAD
;
16676 gcc_assert (!TARGET_64BIT
);
16678 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16679 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16680 fprintf (file
, "%s%s%d-%s%d\n",
16681 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16682 else if (HAVE_AS_GOTOFF_IN_DATA
)
16683 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16685 else if (TARGET_MACHO
)
16687 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16688 machopic_output_function_base_name (file
);
16693 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16694 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16697 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16701 ix86_expand_clear (rtx dest
)
16705 /* We play register width games, which are only valid after reload. */
16706 gcc_assert (reload_completed
);
16708 /* Avoid HImode and its attendant prefix byte. */
16709 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16710 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16711 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16713 if (!TARGET_USE_MOV0
|| optimize_insn_for_size_p ())
16715 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16716 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16722 /* X is an unchanging MEM. If it is a constant pool reference, return
16723 the constant pool rtx, else NULL. */
16726 maybe_get_pool_constant (rtx x
)
16728 x
= ix86_delegitimize_address (XEXP (x
, 0));
16730 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16731 return get_pool_constant (x
);
16737 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16740 enum tls_model model
;
16745 if (GET_CODE (op1
) == SYMBOL_REF
)
16749 model
= SYMBOL_REF_TLS_MODEL (op1
);
16752 op1
= legitimize_tls_address (op1
, model
, true);
16753 op1
= force_operand (op1
, op0
);
16756 op1
= convert_to_mode (mode
, op1
, 1);
16758 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16761 else if (GET_CODE (op1
) == CONST
16762 && GET_CODE (XEXP (op1
, 0)) == PLUS
16763 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16765 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16766 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16769 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16771 tmp
= legitimize_tls_address (symbol
, model
, true);
16773 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16777 tmp
= force_operand (tmp
, NULL
);
16778 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16779 op0
, 1, OPTAB_DIRECT
);
16782 op1
= convert_to_mode (mode
, tmp
, 1);
16786 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16787 && symbolic_operand (op1
, mode
))
16789 if (TARGET_MACHO
&& !TARGET_64BIT
)
16792 /* dynamic-no-pic */
16793 if (MACHOPIC_INDIRECT
)
16795 rtx temp
= ((reload_in_progress
16796 || ((op0
&& REG_P (op0
))
16798 ? op0
: gen_reg_rtx (Pmode
));
16799 op1
= machopic_indirect_data_reference (op1
, temp
);
16801 op1
= machopic_legitimize_pic_address (op1
, mode
,
16802 temp
== op1
? 0 : temp
);
16804 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16806 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16810 if (GET_CODE (op0
) == MEM
)
16811 op1
= force_reg (Pmode
, op1
);
16815 if (GET_CODE (temp
) != REG
)
16816 temp
= gen_reg_rtx (Pmode
);
16817 temp
= legitimize_pic_address (op1
, temp
);
16822 /* dynamic-no-pic */
16828 op1
= force_reg (mode
, op1
);
16829 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16831 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16832 op1
= legitimize_pic_address (op1
, reg
);
16835 op1
= convert_to_mode (mode
, op1
, 1);
16842 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16843 || !push_operand (op0
, mode
))
16845 op1
= force_reg (mode
, op1
);
16847 if (push_operand (op0
, mode
)
16848 && ! general_no_elim_operand (op1
, mode
))
16849 op1
= copy_to_mode_reg (mode
, op1
);
16851 /* Force large constants in 64bit compilation into register
16852 to get them CSEed. */
16853 if (can_create_pseudo_p ()
16854 && (mode
== DImode
) && TARGET_64BIT
16855 && immediate_operand (op1
, mode
)
16856 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16857 && !register_operand (op0
, mode
)
16859 op1
= copy_to_mode_reg (mode
, op1
);
16861 if (can_create_pseudo_p ()
16862 && FLOAT_MODE_P (mode
)
16863 && GET_CODE (op1
) == CONST_DOUBLE
)
16865 /* If we are loading a floating point constant to a register,
16866 force the value to memory now, since we'll get better code
16867 out the back end. */
16869 op1
= validize_mem (force_const_mem (mode
, op1
));
16870 if (!register_operand (op0
, mode
))
16872 rtx temp
= gen_reg_rtx (mode
);
16873 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16874 emit_move_insn (op0
, temp
);
16880 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16884 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16886 rtx op0
= operands
[0], op1
= operands
[1];
16887 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16889 if (push_operand (op0
, VOIDmode
))
16890 op0
= emit_move_resolve_push (mode
, op0
);
16892 /* Force constants other than zero into memory. We do not know how
16893 the instructions used to build constants modify the upper 64 bits
16894 of the register, once we have that information we may be able
16895 to handle some of them more efficiently. */
16896 if (can_create_pseudo_p ()
16897 && register_operand (op0
, mode
)
16898 && (CONSTANT_P (op1
)
16899 || (GET_CODE (op1
) == SUBREG
16900 && CONSTANT_P (SUBREG_REG (op1
))))
16901 && !standard_sse_constant_p (op1
))
16902 op1
= validize_mem (force_const_mem (mode
, op1
));
16904 /* We need to check memory alignment for SSE mode since attribute
16905 can make operands unaligned. */
16906 if (can_create_pseudo_p ()
16907 && SSE_REG_MODE_P (mode
)
16908 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16909 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16913 /* ix86_expand_vector_move_misalign() does not like constants ... */
16914 if (CONSTANT_P (op1
)
16915 || (GET_CODE (op1
) == SUBREG
16916 && CONSTANT_P (SUBREG_REG (op1
))))
16917 op1
= validize_mem (force_const_mem (mode
, op1
));
16919 /* ... nor both arguments in memory. */
16920 if (!register_operand (op0
, mode
)
16921 && !register_operand (op1
, mode
))
16922 op1
= force_reg (mode
, op1
);
16924 tmp
[0] = op0
; tmp
[1] = op1
;
16925 ix86_expand_vector_move_misalign (mode
, tmp
);
16929 /* Make operand1 a register if it isn't already. */
16930 if (can_create_pseudo_p ()
16931 && !register_operand (op0
, mode
)
16932 && !register_operand (op1
, mode
))
16934 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16938 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16941 /* Split 32-byte AVX unaligned load and store if needed. */
16944 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16947 rtx (*extract
) (rtx
, rtx
, rtx
);
16948 rtx (*load_unaligned
) (rtx
, rtx
);
16949 rtx (*store_unaligned
) (rtx
, rtx
);
16950 enum machine_mode mode
;
16952 switch (GET_MODE (op0
))
16955 gcc_unreachable ();
16957 extract
= gen_avx_vextractf128v32qi
;
16958 load_unaligned
= gen_avx_loaddquv32qi
;
16959 store_unaligned
= gen_avx_storedquv32qi
;
16963 extract
= gen_avx_vextractf128v8sf
;
16964 load_unaligned
= gen_avx_loadups256
;
16965 store_unaligned
= gen_avx_storeups256
;
16969 extract
= gen_avx_vextractf128v4df
;
16970 load_unaligned
= gen_avx_loadupd256
;
16971 store_unaligned
= gen_avx_storeupd256
;
16978 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16980 rtx r
= gen_reg_rtx (mode
);
16981 m
= adjust_address (op1
, mode
, 0);
16982 emit_move_insn (r
, m
);
16983 m
= adjust_address (op1
, mode
, 16);
16984 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16985 emit_move_insn (op0
, r
);
16987 /* Normal *mov<mode>_internal pattern will handle
16988 unaligned loads just fine if misaligned_operand
16989 is true, and without the UNSPEC it can be combined
16990 with arithmetic instructions. */
16991 else if (misaligned_operand (op1
, GET_MODE (op1
)))
16992 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16994 emit_insn (load_unaligned (op0
, op1
));
16996 else if (MEM_P (op0
))
16998 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
17000 m
= adjust_address (op0
, mode
, 0);
17001 emit_insn (extract (m
, op1
, const0_rtx
));
17002 m
= adjust_address (op0
, mode
, 16);
17003 emit_insn (extract (m
, op1
, const1_rtx
));
17006 emit_insn (store_unaligned (op0
, op1
));
17009 gcc_unreachable ();
17012 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17013 straight to ix86_expand_vector_move. */
17014 /* Code generation for scalar reg-reg moves of single and double precision data:
17015 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17019 if (x86_sse_partial_reg_dependency == true)
17024 Code generation for scalar loads of double precision data:
17025 if (x86_sse_split_regs == true)
17026 movlpd mem, reg (gas syntax)
17030 Code generation for unaligned packed loads of single precision data
17031 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17032 if (x86_sse_unaligned_move_optimal)
17035 if (x86_sse_partial_reg_dependency == true)
17047 Code generation for unaligned packed loads of double precision data
17048 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17049 if (x86_sse_unaligned_move_optimal)
17052 if (x86_sse_split_regs == true)
17065 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
17067 rtx op0
, op1
, orig_op0
= NULL_RTX
, m
;
17068 rtx (*load_unaligned
) (rtx
, rtx
);
17069 rtx (*store_unaligned
) (rtx
, rtx
);
17074 if (GET_MODE_SIZE (mode
) == 64)
17076 switch (GET_MODE_CLASS (mode
))
17078 case MODE_VECTOR_INT
:
17080 if (GET_MODE (op0
) != V16SImode
)
17085 op0
= gen_reg_rtx (V16SImode
);
17088 op0
= gen_lowpart (V16SImode
, op0
);
17090 op1
= gen_lowpart (V16SImode
, op1
);
17093 case MODE_VECTOR_FLOAT
:
17094 switch (GET_MODE (op0
))
17097 gcc_unreachable ();
17099 load_unaligned
= gen_avx512f_loaddquv16si
;
17100 store_unaligned
= gen_avx512f_storedquv16si
;
17103 load_unaligned
= gen_avx512f_loadups512
;
17104 store_unaligned
= gen_avx512f_storeups512
;
17107 load_unaligned
= gen_avx512f_loadupd512
;
17108 store_unaligned
= gen_avx512f_storeupd512
;
17113 emit_insn (load_unaligned (op0
, op1
));
17114 else if (MEM_P (op0
))
17115 emit_insn (store_unaligned (op0
, op1
));
17117 gcc_unreachable ();
17119 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
17123 gcc_unreachable ();
17130 && GET_MODE_SIZE (mode
) == 32)
17132 switch (GET_MODE_CLASS (mode
))
17134 case MODE_VECTOR_INT
:
17136 if (GET_MODE (op0
) != V32QImode
)
17141 op0
= gen_reg_rtx (V32QImode
);
17144 op0
= gen_lowpart (V32QImode
, op0
);
17146 op1
= gen_lowpart (V32QImode
, op1
);
17149 case MODE_VECTOR_FLOAT
:
17150 ix86_avx256_split_vector_move_misalign (op0
, op1
);
17152 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
17156 gcc_unreachable ();
17164 /* Normal *mov<mode>_internal pattern will handle
17165 unaligned loads just fine if misaligned_operand
17166 is true, and without the UNSPEC it can be combined
17167 with arithmetic instructions. */
17169 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
17170 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
17171 && misaligned_operand (op1
, GET_MODE (op1
)))
17172 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
17173 /* ??? If we have typed data, then it would appear that using
17174 movdqu is the only way to get unaligned data loaded with
17176 else if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
17178 if (GET_MODE (op0
) != V16QImode
)
17181 op0
= gen_reg_rtx (V16QImode
);
17183 op1
= gen_lowpart (V16QImode
, op1
);
17184 /* We will eventually emit movups based on insn attributes. */
17185 emit_insn (gen_sse2_loaddquv16qi (op0
, op1
));
17187 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
17189 else if (TARGET_SSE2
&& mode
== V2DFmode
)
17194 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17195 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17196 || optimize_insn_for_size_p ())
17198 /* We will eventually emit movups based on insn attributes. */
17199 emit_insn (gen_sse2_loadupd (op0
, op1
));
17203 /* When SSE registers are split into halves, we can avoid
17204 writing to the top half twice. */
17205 if (TARGET_SSE_SPLIT_REGS
)
17207 emit_clobber (op0
);
17212 /* ??? Not sure about the best option for the Intel chips.
17213 The following would seem to satisfy; the register is
17214 entirely cleared, breaking the dependency chain. We
17215 then store to the upper half, with a dependency depth
17216 of one. A rumor has it that Intel recommends two movsd
17217 followed by an unpacklpd, but this is unconfirmed. And
17218 given that the dependency depth of the unpacklpd would
17219 still be one, I'm not sure why this would be better. */
17220 zero
= CONST0_RTX (V2DFmode
);
17223 m
= adjust_address (op1
, DFmode
, 0);
17224 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
17225 m
= adjust_address (op1
, DFmode
, 8);
17226 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
17233 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17234 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17235 || optimize_insn_for_size_p ())
17237 if (GET_MODE (op0
) != V4SFmode
)
17240 op0
= gen_reg_rtx (V4SFmode
);
17242 op1
= gen_lowpart (V4SFmode
, op1
);
17243 emit_insn (gen_sse_loadups (op0
, op1
));
17245 emit_move_insn (orig_op0
,
17246 gen_lowpart (GET_MODE (orig_op0
), op0
));
17250 if (mode
!= V4SFmode
)
17251 t
= gen_reg_rtx (V4SFmode
);
17255 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
17256 emit_move_insn (t
, CONST0_RTX (V4SFmode
));
17260 m
= adjust_address (op1
, V2SFmode
, 0);
17261 emit_insn (gen_sse_loadlps (t
, t
, m
));
17262 m
= adjust_address (op1
, V2SFmode
, 8);
17263 emit_insn (gen_sse_loadhps (t
, t
, m
));
17264 if (mode
!= V4SFmode
)
17265 emit_move_insn (op0
, gen_lowpart (mode
, t
));
17268 else if (MEM_P (op0
))
17270 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
17272 op0
= gen_lowpart (V16QImode
, op0
);
17273 op1
= gen_lowpart (V16QImode
, op1
);
17274 /* We will eventually emit movups based on insn attributes. */
17275 emit_insn (gen_sse2_storedquv16qi (op0
, op1
));
17277 else if (TARGET_SSE2
&& mode
== V2DFmode
)
17280 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17281 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17282 || optimize_insn_for_size_p ())
17283 /* We will eventually emit movups based on insn attributes. */
17284 emit_insn (gen_sse2_storeupd (op0
, op1
));
17287 m
= adjust_address (op0
, DFmode
, 0);
17288 emit_insn (gen_sse2_storelpd (m
, op1
));
17289 m
= adjust_address (op0
, DFmode
, 8);
17290 emit_insn (gen_sse2_storehpd (m
, op1
));
17295 if (mode
!= V4SFmode
)
17296 op1
= gen_lowpart (V4SFmode
, op1
);
17299 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17300 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17301 || optimize_insn_for_size_p ())
17303 op0
= gen_lowpart (V4SFmode
, op0
);
17304 emit_insn (gen_sse_storeups (op0
, op1
));
17308 m
= adjust_address (op0
, V2SFmode
, 0);
17309 emit_insn (gen_sse_storelps (m
, op1
));
17310 m
= adjust_address (op0
, V2SFmode
, 8);
17311 emit_insn (gen_sse_storehps (m
, op1
));
17316 gcc_unreachable ();
17319 /* Helper function of ix86_fixup_binary_operands to canonicalize
17320 operand order. Returns true if the operands should be swapped. */
17323 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
17326 rtx dst
= operands
[0];
17327 rtx src1
= operands
[1];
17328 rtx src2
= operands
[2];
17330 /* If the operation is not commutative, we can't do anything. */
17331 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
17334 /* Highest priority is that src1 should match dst. */
17335 if (rtx_equal_p (dst
, src1
))
17337 if (rtx_equal_p (dst
, src2
))
17340 /* Next highest priority is that immediate constants come second. */
17341 if (immediate_operand (src2
, mode
))
17343 if (immediate_operand (src1
, mode
))
17346 /* Lowest priority is that memory references should come second. */
17356 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17357 destination to use for the operation. If different from the true
17358 destination in operands[0], a copy operation will be required. */
17361 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
17364 rtx dst
= operands
[0];
17365 rtx src1
= operands
[1];
17366 rtx src2
= operands
[2];
17368 /* Canonicalize operand order. */
17369 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17373 /* It is invalid to swap operands of different modes. */
17374 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
17381 /* Both source operands cannot be in memory. */
17382 if (MEM_P (src1
) && MEM_P (src2
))
17384 /* Optimization: Only read from memory once. */
17385 if (rtx_equal_p (src1
, src2
))
17387 src2
= force_reg (mode
, src2
);
17390 else if (rtx_equal_p (dst
, src1
))
17391 src2
= force_reg (mode
, src2
);
17393 src1
= force_reg (mode
, src1
);
17396 /* If the destination is memory, and we do not have matching source
17397 operands, do things in registers. */
17398 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17399 dst
= gen_reg_rtx (mode
);
17401 /* Source 1 cannot be a constant. */
17402 if (CONSTANT_P (src1
))
17403 src1
= force_reg (mode
, src1
);
17405 /* Source 1 cannot be a non-matching memory. */
17406 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17407 src1
= force_reg (mode
, src1
);
17409 /* Improve address combine. */
17411 && GET_MODE_CLASS (mode
) == MODE_INT
17413 src2
= force_reg (mode
, src2
);
17415 operands
[1] = src1
;
17416 operands
[2] = src2
;
17420 /* Similarly, but assume that the destination has already been
17421 set up properly. */
17424 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
17425 enum machine_mode mode
, rtx operands
[])
17427 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17428 gcc_assert (dst
== operands
[0]);
17431 /* Attempt to expand a binary operator. Make the expansion closer to the
17432 actual machine, then just general_operand, which will allow 3 separate
17433 memory references (one output, two input) in a single insn. */
17436 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
17439 rtx src1
, src2
, dst
, op
, clob
;
17441 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17442 src1
= operands
[1];
17443 src2
= operands
[2];
17445 /* Emit the instruction. */
17447 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
17448 if (reload_in_progress
)
17450 /* Reload doesn't know about the flags register, and doesn't know that
17451 it doesn't want to clobber it. We can only do this with PLUS. */
17452 gcc_assert (code
== PLUS
);
17455 else if (reload_completed
17457 && !rtx_equal_p (dst
, src1
))
17459 /* This is going to be an LEA; avoid splitting it later. */
17464 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17465 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17468 /* Fix up the destination if needed. */
17469 if (dst
!= operands
[0])
17470 emit_move_insn (operands
[0], dst
);
17473 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17474 the given OPERANDS. */
17477 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
17480 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
17481 if (GET_CODE (operands
[1]) == SUBREG
)
17486 else if (GET_CODE (operands
[2]) == SUBREG
)
17491 /* Optimize (__m128i) d | (__m128i) e and similar code
17492 when d and e are float vectors into float vector logical
17493 insn. In C/C++ without using intrinsics there is no other way
17494 to express vector logical operation on float vectors than
17495 to cast them temporarily to integer vectors. */
17497 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17498 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
17499 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
17500 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
17501 && SUBREG_BYTE (op1
) == 0
17502 && (GET_CODE (op2
) == CONST_VECTOR
17503 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
17504 && SUBREG_BYTE (op2
) == 0))
17505 && can_create_pseudo_p ())
17508 switch (GET_MODE (SUBREG_REG (op1
)))
17514 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
17515 if (GET_CODE (op2
) == CONST_VECTOR
)
17517 op2
= gen_lowpart (GET_MODE (dst
), op2
);
17518 op2
= force_reg (GET_MODE (dst
), op2
);
17523 op2
= SUBREG_REG (operands
[2]);
17524 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
17525 op2
= force_reg (GET_MODE (dst
), op2
);
17527 op1
= SUBREG_REG (op1
);
17528 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
17529 op1
= force_reg (GET_MODE (dst
), op1
);
17530 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
17531 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
17533 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
17539 if (!nonimmediate_operand (operands
[1], mode
))
17540 operands
[1] = force_reg (mode
, operands
[1]);
17541 if (!nonimmediate_operand (operands
[2], mode
))
17542 operands
[2] = force_reg (mode
, operands
[2]);
17543 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
17544 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
17545 gen_rtx_fmt_ee (code
, mode
, operands
[1],
17549 /* Return TRUE or FALSE depending on whether the binary operator meets the
17550 appropriate constraints. */
17553 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
17556 rtx dst
= operands
[0];
17557 rtx src1
= operands
[1];
17558 rtx src2
= operands
[2];
17560 /* Both source operands cannot be in memory. */
17561 if (MEM_P (src1
) && MEM_P (src2
))
17564 /* Canonicalize operand order for commutative operators. */
17565 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17572 /* If the destination is memory, we must have a matching source operand. */
17573 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17576 /* Source 1 cannot be a constant. */
17577 if (CONSTANT_P (src1
))
17580 /* Source 1 cannot be a non-matching memory. */
17581 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17582 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17583 return (code
== AND
17586 || (TARGET_64BIT
&& mode
== DImode
))
17587 && satisfies_constraint_L (src2
));
17592 /* Attempt to expand a unary operator. Make the expansion closer to the
17593 actual machine, then just general_operand, which will allow 2 separate
17594 memory references (one output, one input) in a single insn. */
17597 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17600 int matching_memory
;
17601 rtx src
, dst
, op
, clob
;
17606 /* If the destination is memory, and we do not have matching source
17607 operands, do things in registers. */
17608 matching_memory
= 0;
17611 if (rtx_equal_p (dst
, src
))
17612 matching_memory
= 1;
17614 dst
= gen_reg_rtx (mode
);
17617 /* When source operand is memory, destination must match. */
17618 if (MEM_P (src
) && !matching_memory
)
17619 src
= force_reg (mode
, src
);
17621 /* Emit the instruction. */
17623 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17624 if (reload_in_progress
|| code
== NOT
)
17626 /* Reload doesn't know about the flags register, and doesn't know that
17627 it doesn't want to clobber it. */
17628 gcc_assert (code
== NOT
);
17633 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17634 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17637 /* Fix up the destination if needed. */
17638 if (dst
!= operands
[0])
17639 emit_move_insn (operands
[0], dst
);
17642 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17643 divisor are within the range [0-255]. */
17646 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17649 rtx_code_label
*end_label
, *qimode_label
;
17650 rtx insn
, div
, mod
;
17651 rtx scratch
, tmp0
, tmp1
, tmp2
;
17652 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17653 rtx (*gen_zero_extend
) (rtx
, rtx
);
17654 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17659 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17660 gen_test_ccno_1
= gen_testsi_ccno_1
;
17661 gen_zero_extend
= gen_zero_extendqisi2
;
17664 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17665 gen_test_ccno_1
= gen_testdi_ccno_1
;
17666 gen_zero_extend
= gen_zero_extendqidi2
;
17669 gcc_unreachable ();
17672 end_label
= gen_label_rtx ();
17673 qimode_label
= gen_label_rtx ();
17675 scratch
= gen_reg_rtx (mode
);
17677 /* Use 8bit unsigned divimod if dividend and divisor are within
17678 the range [0-255]. */
17679 emit_move_insn (scratch
, operands
[2]);
17680 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17681 scratch
, 1, OPTAB_DIRECT
);
17682 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17683 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17684 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17685 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17686 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17688 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17689 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17690 JUMP_LABEL (insn
) = qimode_label
;
17692 /* Generate original signed/unsigned divimod. */
17693 div
= gen_divmod4_1 (operands
[0], operands
[1],
17694 operands
[2], operands
[3]);
17697 /* Branch to the end. */
17698 emit_jump_insn (gen_jump (end_label
));
17701 /* Generate 8bit unsigned divide. */
17702 emit_label (qimode_label
);
17703 /* Don't use operands[0] for result of 8bit divide since not all
17704 registers support QImode ZERO_EXTRACT. */
17705 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17706 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17707 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17708 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17712 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17713 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17717 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17718 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17721 /* Extract remainder from AH. */
17722 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17723 if (REG_P (operands
[1]))
17724 insn
= emit_move_insn (operands
[1], tmp1
);
17727 /* Need a new scratch register since the old one has result
17729 scratch
= gen_reg_rtx (mode
);
17730 emit_move_insn (scratch
, tmp1
);
17731 insn
= emit_move_insn (operands
[1], scratch
);
17733 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17735 /* Zero extend quotient from AL. */
17736 tmp1
= gen_lowpart (QImode
, tmp0
);
17737 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17738 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17740 emit_label (end_label
);
17743 /* Whether it is OK to emit CFI directives when emitting asm code. */
17748 return dwarf2out_do_cfi_asm ();
17751 #define LEA_MAX_STALL (3)
17752 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17754 /* Increase given DISTANCE in half-cycles according to
17755 dependencies between PREV and NEXT instructions.
17756 Add 1 half-cycle if there is no dependency and
17757 go to next cycle if there is some dependecy. */
17759 static unsigned int
17760 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
17764 if (!prev
|| !next
)
17765 return distance
+ (distance
& 1) + 2;
17767 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17768 return distance
+ 1;
17770 FOR_EACH_INSN_USE (use
, next
)
17771 FOR_EACH_INSN_DEF (def
, prev
)
17772 if (!DF_REF_IS_ARTIFICIAL (def
)
17773 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
17774 return distance
+ (distance
& 1) + 2;
17776 return distance
+ 1;
17779 /* Function checks if instruction INSN defines register number
17780 REGNO1 or REGNO2. */
17783 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17788 FOR_EACH_INSN_DEF (def
, insn
)
17789 if (DF_REF_REG_DEF_P (def
)
17790 && !DF_REF_IS_ARTIFICIAL (def
)
17791 && (regno1
== DF_REF_REGNO (def
)
17792 || regno2
== DF_REF_REGNO (def
)))
17798 /* Function checks if instruction INSN uses register number
17799 REGNO as a part of address expression. */
17802 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17806 FOR_EACH_INSN_USE (use
, insn
)
17807 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
17813 /* Search backward for non-agu definition of register number REGNO1
17814 or register number REGNO2 in basic block starting from instruction
17815 START up to head of basic block or instruction INSN.
17817 Function puts true value into *FOUND var if definition was found
17818 and false otherwise.
17820 Distance in half-cycles between START and found instruction or head
17821 of BB is added to DISTANCE and returned. */
17824 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17825 rtx_insn
*insn
, int distance
,
17826 rtx_insn
*start
, bool *found
)
17828 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17829 rtx_insn
*prev
= start
;
17830 rtx_insn
*next
= NULL
;
17836 && distance
< LEA_SEARCH_THRESHOLD
)
17838 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17840 distance
= increase_distance (prev
, next
, distance
);
17841 if (insn_defines_reg (regno1
, regno2
, prev
))
17843 if (recog_memoized (prev
) < 0
17844 || get_attr_type (prev
) != TYPE_LEA
)
17853 if (prev
== BB_HEAD (bb
))
17856 prev
= PREV_INSN (prev
);
17862 /* Search backward for non-agu definition of register number REGNO1
17863 or register number REGNO2 in INSN's basic block until
17864 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17865 2. Reach neighbour BBs boundary, or
17866 3. Reach agu definition.
17867 Returns the distance between the non-agu definition point and INSN.
17868 If no definition point, returns -1. */
17871 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17874 basic_block bb
= BLOCK_FOR_INSN (insn
);
17876 bool found
= false;
17878 if (insn
!= BB_HEAD (bb
))
17879 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17880 distance
, PREV_INSN (insn
),
17883 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17887 bool simple_loop
= false;
17889 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17892 simple_loop
= true;
17897 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17899 BB_END (bb
), &found
);
17902 int shortest_dist
= -1;
17903 bool found_in_bb
= false;
17905 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17908 = distance_non_agu_define_in_bb (regno1
, regno2
,
17914 if (shortest_dist
< 0)
17915 shortest_dist
= bb_dist
;
17916 else if (bb_dist
> 0)
17917 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17923 distance
= shortest_dist
;
17927 /* get_attr_type may modify recog data. We want to make sure
17928 that recog data is valid for instruction INSN, on which
17929 distance_non_agu_define is called. INSN is unchanged here. */
17930 extract_insn_cached (insn
);
17935 return distance
>> 1;
17938 /* Return the distance in half-cycles between INSN and the next
17939 insn that uses register number REGNO in memory address added
17940 to DISTANCE. Return -1 if REGNO0 is set.
17942 Put true value into *FOUND if register usage was found and
17944 Put true value into *REDEFINED if register redefinition was
17945 found and false otherwise. */
17948 distance_agu_use_in_bb (unsigned int regno
,
17949 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
17950 bool *found
, bool *redefined
)
17952 basic_block bb
= NULL
;
17953 rtx_insn
*next
= start
;
17954 rtx_insn
*prev
= NULL
;
17957 *redefined
= false;
17959 if (start
!= NULL_RTX
)
17961 bb
= BLOCK_FOR_INSN (start
);
17962 if (start
!= BB_HEAD (bb
))
17963 /* If insn and start belong to the same bb, set prev to insn,
17964 so the call to increase_distance will increase the distance
17965 between insns by 1. */
17971 && distance
< LEA_SEARCH_THRESHOLD
)
17973 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17975 distance
= increase_distance(prev
, next
, distance
);
17976 if (insn_uses_reg_mem (regno
, next
))
17978 /* Return DISTANCE if OP0 is used in memory
17979 address in NEXT. */
17984 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17986 /* Return -1 if OP0 is set in NEXT. */
17994 if (next
== BB_END (bb
))
17997 next
= NEXT_INSN (next
);
18003 /* Return the distance between INSN and the next insn that uses
18004 register number REGNO0 in memory address. Return -1 if no such
18005 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18008 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
18010 basic_block bb
= BLOCK_FOR_INSN (insn
);
18012 bool found
= false;
18013 bool redefined
= false;
18015 if (insn
!= BB_END (bb
))
18016 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
18018 &found
, &redefined
);
18020 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
18024 bool simple_loop
= false;
18026 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
18029 simple_loop
= true;
18034 distance
= distance_agu_use_in_bb (regno0
, insn
,
18035 distance
, BB_HEAD (bb
),
18036 &found
, &redefined
);
18039 int shortest_dist
= -1;
18040 bool found_in_bb
= false;
18041 bool redefined_in_bb
= false;
18043 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
18046 = distance_agu_use_in_bb (regno0
, insn
,
18047 distance
, BB_HEAD (e
->dest
),
18048 &found_in_bb
, &redefined_in_bb
);
18051 if (shortest_dist
< 0)
18052 shortest_dist
= bb_dist
;
18053 else if (bb_dist
> 0)
18054 shortest_dist
= MIN (bb_dist
, shortest_dist
);
18060 distance
= shortest_dist
;
18064 if (!found
|| redefined
)
18067 return distance
>> 1;
18070 /* Define this macro to tune LEA priority vs ADD, it take effect when
18071 there is a dilemma of choicing LEA or ADD
18072 Negative value: ADD is more preferred than LEA
18074 Positive value: LEA is more preferred than ADD*/
18075 #define IX86_LEA_PRIORITY 0
18077 /* Return true if usage of lea INSN has performance advantage
18078 over a sequence of instructions. Instructions sequence has
18079 SPLIT_COST cycles higher latency than lea latency. */
18082 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
18083 unsigned int regno2
, int split_cost
, bool has_scale
)
18085 int dist_define
, dist_use
;
18087 /* For Silvermont if using a 2-source or 3-source LEA for
18088 non-destructive destination purposes, or due to wanting
18089 ability to use SCALE, the use of LEA is justified. */
18090 if (TARGET_SILVERMONT
|| TARGET_INTEL
)
18094 if (split_cost
< 1)
18096 if (regno0
== regno1
|| regno0
== regno2
)
18101 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
18102 dist_use
= distance_agu_use (regno0
, insn
);
18104 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
18106 /* If there is no non AGU operand definition, no AGU
18107 operand usage and split cost is 0 then both lea
18108 and non lea variants have same priority. Currently
18109 we prefer lea for 64 bit code and non lea on 32 bit
18111 if (dist_use
< 0 && split_cost
== 0)
18112 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
18117 /* With longer definitions distance lea is more preferable.
18118 Here we change it to take into account splitting cost and
18120 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
18122 /* If there is no use in memory addess then we just check
18123 that split cost exceeds AGU stall. */
18125 return dist_define
> LEA_MAX_STALL
;
18127 /* If this insn has both backward non-agu dependence and forward
18128 agu dependence, the one with short distance takes effect. */
18129 return dist_define
>= dist_use
;
18132 /* Return true if it is legal to clobber flags by INSN and
18133 false otherwise. */
18136 ix86_ok_to_clobber_flags (rtx_insn
*insn
)
18138 basic_block bb
= BLOCK_FOR_INSN (insn
);
18144 if (NONDEBUG_INSN_P (insn
))
18146 FOR_EACH_INSN_USE (use
, insn
)
18147 if (DF_REF_REG_USE_P (use
) && DF_REF_REGNO (use
) == FLAGS_REG
)
18150 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
18154 if (insn
== BB_END (bb
))
18157 insn
= NEXT_INSN (insn
);
18160 live
= df_get_live_out(bb
);
18161 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
18164 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18165 move and add to avoid AGU stalls. */
18168 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
18170 unsigned int regno0
, regno1
, regno2
;
18172 /* Check if we need to optimize. */
18173 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18176 /* Check it is correct to split here. */
18177 if (!ix86_ok_to_clobber_flags(insn
))
18180 regno0
= true_regnum (operands
[0]);
18181 regno1
= true_regnum (operands
[1]);
18182 regno2
= true_regnum (operands
[2]);
18184 /* We need to split only adds with non destructive
18185 destination operand. */
18186 if (regno0
== regno1
|| regno0
== regno2
)
18189 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
18192 /* Return true if we should emit lea instruction instead of mov
18196 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
18198 unsigned int regno0
, regno1
;
18200 /* Check if we need to optimize. */
18201 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18204 /* Use lea for reg to reg moves only. */
18205 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
18208 regno0
= true_regnum (operands
[0]);
18209 regno1
= true_regnum (operands
[1]);
18211 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
18214 /* Return true if we need to split lea into a sequence of
18215 instructions to avoid AGU stalls. */
18218 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
18220 unsigned int regno0
, regno1
, regno2
;
18222 struct ix86_address parts
;
18225 /* Check we need to optimize. */
18226 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
18229 /* The "at least two components" test below might not catch simple
18230 move or zero extension insns if parts.base is non-NULL and parts.disp
18231 is const0_rtx as the only components in the address, e.g. if the
18232 register is %rbp or %r13. As this test is much cheaper and moves or
18233 zero extensions are the common case, do this check first. */
18234 if (REG_P (operands
[1])
18235 || (SImode_address_operand (operands
[1], VOIDmode
)
18236 && REG_P (XEXP (operands
[1], 0))))
18239 /* Check if it is OK to split here. */
18240 if (!ix86_ok_to_clobber_flags (insn
))
18243 ok
= ix86_decompose_address (operands
[1], &parts
);
18246 /* There should be at least two components in the address. */
18247 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
18248 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
18251 /* We should not split into add if non legitimate pic
18252 operand is used as displacement. */
18253 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
18256 regno0
= true_regnum (operands
[0]) ;
18257 regno1
= INVALID_REGNUM
;
18258 regno2
= INVALID_REGNUM
;
18261 regno1
= true_regnum (parts
.base
);
18263 regno2
= true_regnum (parts
.index
);
18267 /* Compute how many cycles we will add to execution time
18268 if split lea into a sequence of instructions. */
18269 if (parts
.base
|| parts
.index
)
18271 /* Have to use mov instruction if non desctructive
18272 destination form is used. */
18273 if (regno1
!= regno0
&& regno2
!= regno0
)
18276 /* Have to add index to base if both exist. */
18277 if (parts
.base
&& parts
.index
)
18280 /* Have to use shift and adds if scale is 2 or greater. */
18281 if (parts
.scale
> 1)
18283 if (regno0
!= regno1
)
18285 else if (regno2
== regno0
)
18288 split_cost
+= parts
.scale
;
18291 /* Have to use add instruction with immediate if
18292 disp is non zero. */
18293 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18296 /* Subtract the price of lea. */
18300 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
18304 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18305 matches destination. RTX includes clobber of FLAGS_REG. */
18308 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
18313 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
18314 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18316 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
18319 /* Return true if regno1 def is nearest to the insn. */
18322 find_nearest_reg_def (rtx_insn
*insn
, int regno1
, int regno2
)
18324 rtx_insn
*prev
= insn
;
18325 rtx_insn
*start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
18329 while (prev
&& prev
!= start
)
18331 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
18333 prev
= PREV_INSN (prev
);
18336 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
18338 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
18340 prev
= PREV_INSN (prev
);
18343 /* None of the regs is defined in the bb. */
18347 /* Split lea instructions into a sequence of instructions
18348 which are executed on ALU to avoid AGU stalls.
18349 It is assumed that it is allowed to clobber flags register
18350 at lea position. */
18353 ix86_split_lea_for_addr (rtx_insn
*insn
, rtx operands
[], enum machine_mode mode
)
18355 unsigned int regno0
, regno1
, regno2
;
18356 struct ix86_address parts
;
18360 ok
= ix86_decompose_address (operands
[1], &parts
);
18363 target
= gen_lowpart (mode
, operands
[0]);
18365 regno0
= true_regnum (target
);
18366 regno1
= INVALID_REGNUM
;
18367 regno2
= INVALID_REGNUM
;
18371 parts
.base
= gen_lowpart (mode
, parts
.base
);
18372 regno1
= true_regnum (parts
.base
);
18377 parts
.index
= gen_lowpart (mode
, parts
.index
);
18378 regno2
= true_regnum (parts
.index
);
18382 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
18384 if (parts
.scale
> 1)
18386 /* Case r1 = r1 + ... */
18387 if (regno1
== regno0
)
18389 /* If we have a case r1 = r1 + C * r2 then we
18390 should use multiplication which is very
18391 expensive. Assume cost model is wrong if we
18392 have such case here. */
18393 gcc_assert (regno2
!= regno0
);
18395 for (adds
= parts
.scale
; adds
> 0; adds
--)
18396 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
18400 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18401 if (regno0
!= regno2
)
18402 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18404 /* Use shift for scaling. */
18405 ix86_emit_binop (ASHIFT
, mode
, target
,
18406 GEN_INT (exact_log2 (parts
.scale
)));
18409 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
18411 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18412 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18415 else if (!parts
.base
&& !parts
.index
)
18417 gcc_assert(parts
.disp
);
18418 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
18424 if (regno0
!= regno2
)
18425 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18427 else if (!parts
.index
)
18429 if (regno0
!= regno1
)
18430 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
18434 if (regno0
== regno1
)
18436 else if (regno0
== regno2
)
18442 /* Find better operand for SET instruction, depending
18443 on which definition is farther from the insn. */
18444 if (find_nearest_reg_def (insn
, regno1
, regno2
))
18445 tmp
= parts
.index
, tmp1
= parts
.base
;
18447 tmp
= parts
.base
, tmp1
= parts
.index
;
18449 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18451 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18452 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18454 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
18458 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
18461 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18462 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18466 /* Return true if it is ok to optimize an ADD operation to LEA
18467 operation to avoid flag register consumation. For most processors,
18468 ADD is faster than LEA. For the processors like BONNELL, if the
18469 destination register of LEA holds an actual address which will be
18470 used soon, LEA is better and otherwise ADD is better. */
18473 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
18475 unsigned int regno0
= true_regnum (operands
[0]);
18476 unsigned int regno1
= true_regnum (operands
[1]);
18477 unsigned int regno2
= true_regnum (operands
[2]);
18479 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18480 if (regno0
!= regno1
&& regno0
!= regno2
)
18483 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18486 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
18489 /* Return true if destination reg of SET_BODY is shift count of
18493 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
18499 /* Retrieve destination of SET_BODY. */
18500 switch (GET_CODE (set_body
))
18503 set_dest
= SET_DEST (set_body
);
18504 if (!set_dest
|| !REG_P (set_dest
))
18508 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
18509 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
18517 /* Retrieve shift count of USE_BODY. */
18518 switch (GET_CODE (use_body
))
18521 shift_rtx
= XEXP (use_body
, 1);
18524 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
18525 if (ix86_dep_by_shift_count_body (set_body
,
18526 XVECEXP (use_body
, 0, i
)))
18534 && (GET_CODE (shift_rtx
) == ASHIFT
18535 || GET_CODE (shift_rtx
) == LSHIFTRT
18536 || GET_CODE (shift_rtx
) == ASHIFTRT
18537 || GET_CODE (shift_rtx
) == ROTATE
18538 || GET_CODE (shift_rtx
) == ROTATERT
))
18540 rtx shift_count
= XEXP (shift_rtx
, 1);
18542 /* Return true if shift count is dest of SET_BODY. */
18543 if (REG_P (shift_count
))
18545 /* Add check since it can be invoked before register
18546 allocation in pre-reload schedule. */
18547 if (reload_completed
18548 && true_regnum (set_dest
) == true_regnum (shift_count
))
18550 else if (REGNO(set_dest
) == REGNO(shift_count
))
18558 /* Return true if destination reg of SET_INSN is shift count of
18562 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
18564 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
18565 PATTERN (use_insn
));
18568 /* Return TRUE or FALSE depending on whether the unary operator meets the
18569 appropriate constraints. */
18572 ix86_unary_operator_ok (enum rtx_code
,
18576 /* If one of operands is memory, source and destination must match. */
18577 if ((MEM_P (operands
[0])
18578 || MEM_P (operands
[1]))
18579 && ! rtx_equal_p (operands
[0], operands
[1]))
18584 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18585 are ok, keeping in mind the possible movddup alternative. */
18588 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
18590 if (MEM_P (operands
[0]))
18591 return rtx_equal_p (operands
[0], operands
[1 + high
]);
18592 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
18593 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18597 /* Post-reload splitter for converting an SF or DFmode value in an
18598 SSE register into an unsigned SImode. */
18601 ix86_split_convert_uns_si_sse (rtx operands
[])
18603 enum machine_mode vecmode
;
18604 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18606 large
= operands
[1];
18607 zero_or_two31
= operands
[2];
18608 input
= operands
[3];
18609 two31
= operands
[4];
18610 vecmode
= GET_MODE (large
);
18611 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18613 /* Load up the value into the low element. We must ensure that the other
18614 elements are valid floats -- zero is the easiest such value. */
18617 if (vecmode
== V4SFmode
)
18618 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18620 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18624 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18625 emit_move_insn (value
, CONST0_RTX (vecmode
));
18626 if (vecmode
== V4SFmode
)
18627 emit_insn (gen_sse_movss (value
, value
, input
));
18629 emit_insn (gen_sse2_movsd (value
, value
, input
));
18632 emit_move_insn (large
, two31
);
18633 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18635 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18636 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18638 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18639 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18641 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18642 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18644 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18645 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18647 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18648 if (vecmode
== V4SFmode
)
18649 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18651 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18654 emit_insn (gen_xorv4si3 (value
, value
, large
));
18657 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18658 Expects the 64-bit DImode to be supplied in a pair of integral
18659 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18660 -mfpmath=sse, !optimize_size only. */
18663 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18665 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18666 rtx int_xmm
, fp_xmm
;
18667 rtx biases
, exponents
;
18670 int_xmm
= gen_reg_rtx (V4SImode
);
18671 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18672 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18673 else if (TARGET_SSE_SPLIT_REGS
)
18675 emit_clobber (int_xmm
);
18676 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18680 x
= gen_reg_rtx (V2DImode
);
18681 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18682 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18685 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18686 gen_rtvec (4, GEN_INT (0x43300000UL
),
18687 GEN_INT (0x45300000UL
),
18688 const0_rtx
, const0_rtx
));
18689 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18691 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18692 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18694 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18695 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18696 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18697 (0x1.0p84 + double(fp_value_hi_xmm)).
18698 Note these exponents differ by 32. */
18700 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18702 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18703 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18704 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18705 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18706 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18707 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18708 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18709 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18710 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18712 /* Add the upper and lower DFmode values together. */
18714 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18717 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18718 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18719 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18722 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18725 /* Not used, but eases macroization of patterns. */
18727 ix86_expand_convert_uns_sixf_sse (rtx
, rtx
)
18729 gcc_unreachable ();
18732 /* Convert an unsigned SImode value into a DFmode. Only currently used
18733 for SSE, but applicable anywhere. */
18736 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18738 REAL_VALUE_TYPE TWO31r
;
18741 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18742 NULL
, 1, OPTAB_DIRECT
);
18744 fp
= gen_reg_rtx (DFmode
);
18745 emit_insn (gen_floatsidf2 (fp
, x
));
18747 real_ldexp (&TWO31r
, &dconst1
, 31);
18748 x
= const_double_from_real_value (TWO31r
, DFmode
);
18750 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18752 emit_move_insn (target
, x
);
18755 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18756 32-bit mode; otherwise we have a direct convert instruction. */
18759 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18761 REAL_VALUE_TYPE TWO32r
;
18762 rtx fp_lo
, fp_hi
, x
;
18764 fp_lo
= gen_reg_rtx (DFmode
);
18765 fp_hi
= gen_reg_rtx (DFmode
);
18767 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18769 real_ldexp (&TWO32r
, &dconst1
, 32);
18770 x
= const_double_from_real_value (TWO32r
, DFmode
);
18771 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18773 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18775 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18778 emit_move_insn (target
, x
);
18781 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18782 For x86_32, -mfpmath=sse, !optimize_size only. */
18784 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18786 REAL_VALUE_TYPE ONE16r
;
18787 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18789 real_ldexp (&ONE16r
, &dconst1
, 16);
18790 x
= const_double_from_real_value (ONE16r
, SFmode
);
18791 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18792 NULL
, 0, OPTAB_DIRECT
);
18793 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18794 NULL
, 0, OPTAB_DIRECT
);
18795 fp_hi
= gen_reg_rtx (SFmode
);
18796 fp_lo
= gen_reg_rtx (SFmode
);
18797 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18798 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18799 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18801 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18803 if (!rtx_equal_p (target
, fp_hi
))
18804 emit_move_insn (target
, fp_hi
);
18807 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18808 a vector of unsigned ints VAL to vector of floats TARGET. */
18811 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18814 REAL_VALUE_TYPE TWO16r
;
18815 enum machine_mode intmode
= GET_MODE (val
);
18816 enum machine_mode fltmode
= GET_MODE (target
);
18817 rtx (*cvt
) (rtx
, rtx
);
18819 if (intmode
== V4SImode
)
18820 cvt
= gen_floatv4siv4sf2
;
18822 cvt
= gen_floatv8siv8sf2
;
18823 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18824 tmp
[0] = force_reg (intmode
, tmp
[0]);
18825 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18827 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18828 NULL_RTX
, 1, OPTAB_DIRECT
);
18829 tmp
[3] = gen_reg_rtx (fltmode
);
18830 emit_insn (cvt (tmp
[3], tmp
[1]));
18831 tmp
[4] = gen_reg_rtx (fltmode
);
18832 emit_insn (cvt (tmp
[4], tmp
[2]));
18833 real_ldexp (&TWO16r
, &dconst1
, 16);
18834 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18835 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18836 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18838 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18840 if (tmp
[7] != target
)
18841 emit_move_insn (target
, tmp
[7]);
18844 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18845 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18846 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18847 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18850 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18852 REAL_VALUE_TYPE TWO31r
;
18853 rtx two31r
, tmp
[4];
18854 enum machine_mode mode
= GET_MODE (val
);
18855 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18856 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18857 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18860 for (i
= 0; i
< 3; i
++)
18861 tmp
[i
] = gen_reg_rtx (mode
);
18862 real_ldexp (&TWO31r
, &dconst1
, 31);
18863 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18864 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18865 two31r
= force_reg (mode
, two31r
);
18868 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18869 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18870 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18871 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18872 default: gcc_unreachable ();
18874 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18875 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18876 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18878 if (intmode
== V4SImode
|| TARGET_AVX2
)
18879 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18880 gen_lowpart (intmode
, tmp
[0]),
18881 GEN_INT (31), NULL_RTX
, 0,
18885 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18886 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18887 *xorp
= expand_simple_binop (intmode
, AND
,
18888 gen_lowpart (intmode
, tmp
[0]),
18889 two31
, NULL_RTX
, 0,
18892 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18896 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18897 then replicate the value for all elements of the vector
18901 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18905 enum machine_mode scalar_mode
;
18928 n_elt
= GET_MODE_NUNITS (mode
);
18929 v
= rtvec_alloc (n_elt
);
18930 scalar_mode
= GET_MODE_INNER (mode
);
18932 RTVEC_ELT (v
, 0) = value
;
18934 for (i
= 1; i
< n_elt
; ++i
)
18935 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18937 return gen_rtx_CONST_VECTOR (mode
, v
);
18940 gcc_unreachable ();
18944 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18945 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18946 for an SSE register. If VECT is true, then replicate the mask for
18947 all elements of the vector register. If INVERT is true, then create
18948 a mask excluding the sign bit. */
18951 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18953 enum machine_mode vec_mode
, imode
;
18954 HOST_WIDE_INT hi
, lo
;
18959 /* Find the sign bit, sign extended to 2*HWI. */
18969 mode
= GET_MODE_INNER (mode
);
18971 lo
= 0x80000000, hi
= lo
< 0;
18981 mode
= GET_MODE_INNER (mode
);
18983 if (HOST_BITS_PER_WIDE_INT
>= 64)
18984 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18986 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18991 vec_mode
= VOIDmode
;
18992 if (HOST_BITS_PER_WIDE_INT
>= 64)
18995 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
19002 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
19006 lo
= ~lo
, hi
= ~hi
;
19012 mask
= immed_double_const (lo
, hi
, imode
);
19014 vec
= gen_rtvec (2, v
, mask
);
19015 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
19016 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
19023 gcc_unreachable ();
19027 lo
= ~lo
, hi
= ~hi
;
19029 /* Force this value into the low part of a fp vector constant. */
19030 mask
= immed_double_const (lo
, hi
, imode
);
19031 mask
= gen_lowpart (mode
, mask
);
19033 if (vec_mode
== VOIDmode
)
19034 return force_reg (mode
, mask
);
19036 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
19037 return force_reg (vec_mode
, v
);
19040 /* Generate code for floating point ABS or NEG. */
19043 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
19046 rtx mask
, set
, dst
, src
;
19047 bool use_sse
= false;
19048 bool vector_mode
= VECTOR_MODE_P (mode
);
19049 enum machine_mode vmode
= mode
;
19053 else if (mode
== TFmode
)
19055 else if (TARGET_SSE_MATH
)
19057 use_sse
= SSE_FLOAT_MODE_P (mode
);
19058 if (mode
== SFmode
)
19060 else if (mode
== DFmode
)
19064 /* NEG and ABS performed with SSE use bitwise mask operations.
19065 Create the appropriate mask now. */
19067 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
19074 set
= gen_rtx_fmt_e (code
, mode
, src
);
19075 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
19082 use
= gen_rtx_USE (VOIDmode
, mask
);
19084 par
= gen_rtvec (2, set
, use
);
19087 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
19088 par
= gen_rtvec (3, set
, use
, clob
);
19090 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
19096 /* Expand a copysign operation. Special case operand 0 being a constant. */
19099 ix86_expand_copysign (rtx operands
[])
19101 enum machine_mode mode
, vmode
;
19102 rtx dest
, op0
, op1
, mask
, nmask
;
19104 dest
= operands
[0];
19108 mode
= GET_MODE (dest
);
19110 if (mode
== SFmode
)
19112 else if (mode
== DFmode
)
19117 if (GET_CODE (op0
) == CONST_DOUBLE
)
19119 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
19121 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
19122 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
19124 if (mode
== SFmode
|| mode
== DFmode
)
19126 if (op0
== CONST0_RTX (mode
))
19127 op0
= CONST0_RTX (vmode
);
19130 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
19132 op0
= force_reg (vmode
, v
);
19135 else if (op0
!= CONST0_RTX (mode
))
19136 op0
= force_reg (mode
, op0
);
19138 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
19140 if (mode
== SFmode
)
19141 copysign_insn
= gen_copysignsf3_const
;
19142 else if (mode
== DFmode
)
19143 copysign_insn
= gen_copysigndf3_const
;
19145 copysign_insn
= gen_copysigntf3_const
;
19147 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
19151 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
19153 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
19154 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
19156 if (mode
== SFmode
)
19157 copysign_insn
= gen_copysignsf3_var
;
19158 else if (mode
== DFmode
)
19159 copysign_insn
= gen_copysigndf3_var
;
19161 copysign_insn
= gen_copysigntf3_var
;
19163 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
19167 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19168 be a constant, and so has already been expanded into a vector constant. */
19171 ix86_split_copysign_const (rtx operands
[])
19173 enum machine_mode mode
, vmode
;
19174 rtx dest
, op0
, mask
, x
;
19176 dest
= operands
[0];
19178 mask
= operands
[3];
19180 mode
= GET_MODE (dest
);
19181 vmode
= GET_MODE (mask
);
19183 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
19184 x
= gen_rtx_AND (vmode
, dest
, mask
);
19185 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19187 if (op0
!= CONST0_RTX (vmode
))
19189 x
= gen_rtx_IOR (vmode
, dest
, op0
);
19190 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19194 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19195 so we have to do two masks. */
19198 ix86_split_copysign_var (rtx operands
[])
19200 enum machine_mode mode
, vmode
;
19201 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
19203 dest
= operands
[0];
19204 scratch
= operands
[1];
19207 nmask
= operands
[4];
19208 mask
= operands
[5];
19210 mode
= GET_MODE (dest
);
19211 vmode
= GET_MODE (mask
);
19213 if (rtx_equal_p (op0
, op1
))
19215 /* Shouldn't happen often (it's useless, obviously), but when it does
19216 we'd generate incorrect code if we continue below. */
19217 emit_move_insn (dest
, op0
);
19221 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
19223 gcc_assert (REGNO (op1
) == REGNO (scratch
));
19225 x
= gen_rtx_AND (vmode
, scratch
, mask
);
19226 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
19229 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
19230 x
= gen_rtx_NOT (vmode
, dest
);
19231 x
= gen_rtx_AND (vmode
, x
, op0
);
19232 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19236 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
19238 x
= gen_rtx_AND (vmode
, scratch
, mask
);
19240 else /* alternative 2,4 */
19242 gcc_assert (REGNO (mask
) == REGNO (scratch
));
19243 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
19244 x
= gen_rtx_AND (vmode
, scratch
, op1
);
19246 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
19248 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
19250 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
19251 x
= gen_rtx_AND (vmode
, dest
, nmask
);
19253 else /* alternative 3,4 */
19255 gcc_assert (REGNO (nmask
) == REGNO (dest
));
19257 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
19258 x
= gen_rtx_AND (vmode
, dest
, op0
);
19260 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19263 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
19264 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19267 /* Return TRUE or FALSE depending on whether the first SET in INSN
19268 has source and destination with matching CC modes, and that the
19269 CC mode is at least as constrained as REQ_MODE. */
19272 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
19275 enum machine_mode set_mode
;
19277 set
= PATTERN (insn
);
19278 if (GET_CODE (set
) == PARALLEL
)
19279 set
= XVECEXP (set
, 0, 0);
19280 gcc_assert (GET_CODE (set
) == SET
);
19281 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
19283 set_mode
= GET_MODE (SET_DEST (set
));
19287 if (req_mode
!= CCNOmode
19288 && (req_mode
!= CCmode
19289 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
19293 if (req_mode
== CCGCmode
)
19297 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
19301 if (req_mode
== CCZmode
)
19311 if (set_mode
!= req_mode
)
19316 gcc_unreachable ();
19319 return GET_MODE (SET_SRC (set
)) == set_mode
;
19322 /* Generate insn patterns to do an integer compare of OPERANDS. */
19325 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19327 enum machine_mode cmpmode
;
19330 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
19331 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
19333 /* This is very simple, but making the interface the same as in the
19334 FP case makes the rest of the code easier. */
19335 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
19336 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
19338 /* Return the test that should be put into the flags user, i.e.
19339 the bcc, scc, or cmov instruction. */
19340 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
19343 /* Figure out whether to use ordered or unordered fp comparisons.
19344 Return the appropriate mode to use. */
19347 ix86_fp_compare_mode (enum rtx_code
)
19349 /* ??? In order to make all comparisons reversible, we do all comparisons
19350 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19351 all forms trapping and nontrapping comparisons, we can make inequality
19352 comparisons trapping again, since it results in better code when using
19353 FCOM based compares. */
19354 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
19358 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
19360 enum machine_mode mode
= GET_MODE (op0
);
19362 if (SCALAR_FLOAT_MODE_P (mode
))
19364 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19365 return ix86_fp_compare_mode (code
);
19370 /* Only zero flag is needed. */
19371 case EQ
: /* ZF=0 */
19372 case NE
: /* ZF!=0 */
19374 /* Codes needing carry flag. */
19375 case GEU
: /* CF=0 */
19376 case LTU
: /* CF=1 */
19377 /* Detect overflow checks. They need just the carry flag. */
19378 if (GET_CODE (op0
) == PLUS
19379 && rtx_equal_p (op1
, XEXP (op0
, 0)))
19383 case GTU
: /* CF=0 & ZF=0 */
19384 case LEU
: /* CF=1 | ZF=1 */
19386 /* Codes possibly doable only with sign flag when
19387 comparing against zero. */
19388 case GE
: /* SF=OF or SF=0 */
19389 case LT
: /* SF<>OF or SF=1 */
19390 if (op1
== const0_rtx
)
19393 /* For other cases Carry flag is not required. */
19395 /* Codes doable only with sign flag when comparing
19396 against zero, but we miss jump instruction for it
19397 so we need to use relational tests against overflow
19398 that thus needs to be zero. */
19399 case GT
: /* ZF=0 & SF=OF */
19400 case LE
: /* ZF=1 | SF<>OF */
19401 if (op1
== const0_rtx
)
19405 /* strcmp pattern do (use flags) and combine may ask us for proper
19410 gcc_unreachable ();
19414 /* Return the fixed registers used for condition codes. */
19417 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
19424 /* If two condition code modes are compatible, return a condition code
19425 mode which is compatible with both. Otherwise, return
19428 static enum machine_mode
19429 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
19434 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
19437 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
19438 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
19441 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
19443 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
19449 gcc_unreachable ();
19479 /* These are only compatible with themselves, which we already
19486 /* Return a comparison we can do and that it is equivalent to
19487 swap_condition (code) apart possibly from orderedness.
19488 But, never change orderedness if TARGET_IEEE_FP, returning
19489 UNKNOWN in that case if necessary. */
19491 static enum rtx_code
19492 ix86_fp_swap_condition (enum rtx_code code
)
19496 case GT
: /* GTU - CF=0 & ZF=0 */
19497 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
19498 case GE
: /* GEU - CF=0 */
19499 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
19500 case UNLT
: /* LTU - CF=1 */
19501 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
19502 case UNLE
: /* LEU - CF=1 | ZF=1 */
19503 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
19505 return swap_condition (code
);
19509 /* Return cost of comparison CODE using the best strategy for performance.
19510 All following functions do use number of instructions as a cost metrics.
19511 In future this should be tweaked to compute bytes for optimize_size and
19512 take into account performance of various instructions on various CPUs. */
19515 ix86_fp_comparison_cost (enum rtx_code code
)
19519 /* The cost of code using bit-twiddling on %ah. */
19536 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
19540 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
19543 gcc_unreachable ();
19546 switch (ix86_fp_comparison_strategy (code
))
19548 case IX86_FPCMP_COMI
:
19549 return arith_cost
> 4 ? 3 : 2;
19550 case IX86_FPCMP_SAHF
:
19551 return arith_cost
> 4 ? 4 : 3;
19557 /* Return strategy to use for floating-point. We assume that fcomi is always
19558 preferrable where available, since that is also true when looking at size
19559 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19561 enum ix86_fpcmp_strategy
19562 ix86_fp_comparison_strategy (enum rtx_code
)
19564 /* Do fcomi/sahf based test when profitable. */
19567 return IX86_FPCMP_COMI
;
19569 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
19570 return IX86_FPCMP_SAHF
;
19572 return IX86_FPCMP_ARITH
;
19575 /* Swap, force into registers, or otherwise massage the two operands
19576 to a fp comparison. The operands are updated in place; the new
19577 comparison code is returned. */
19579 static enum rtx_code
19580 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
19582 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
19583 rtx op0
= *pop0
, op1
= *pop1
;
19584 enum machine_mode op_mode
= GET_MODE (op0
);
19585 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
19587 /* All of the unordered compare instructions only work on registers.
19588 The same is true of the fcomi compare instructions. The XFmode
19589 compare instructions require registers except when comparing
19590 against zero or when converting operand 1 from fixed point to
19594 && (fpcmp_mode
== CCFPUmode
19595 || (op_mode
== XFmode
19596 && ! (standard_80387_constant_p (op0
) == 1
19597 || standard_80387_constant_p (op1
) == 1)
19598 && GET_CODE (op1
) != FLOAT
)
19599 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19601 op0
= force_reg (op_mode
, op0
);
19602 op1
= force_reg (op_mode
, op1
);
19606 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19607 things around if they appear profitable, otherwise force op0
19608 into a register. */
19610 if (standard_80387_constant_p (op0
) == 0
19612 && ! (standard_80387_constant_p (op1
) == 0
19615 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19616 if (new_code
!= UNKNOWN
)
19619 tmp
= op0
, op0
= op1
, op1
= tmp
;
19625 op0
= force_reg (op_mode
, op0
);
19627 if (CONSTANT_P (op1
))
19629 int tmp
= standard_80387_constant_p (op1
);
19631 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19635 op1
= force_reg (op_mode
, op1
);
19638 op1
= force_reg (op_mode
, op1
);
19642 /* Try to rearrange the comparison to make it cheaper. */
19643 if (ix86_fp_comparison_cost (code
)
19644 > ix86_fp_comparison_cost (swap_condition (code
))
19645 && (REG_P (op1
) || can_create_pseudo_p ()))
19648 tmp
= op0
, op0
= op1
, op1
= tmp
;
19649 code
= swap_condition (code
);
19651 op0
= force_reg (op_mode
, op0
);
19659 /* Convert comparison codes we use to represent FP comparison to integer
19660 code that will result in proper branch. Return UNKNOWN if no such code
19664 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19693 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19696 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19698 enum machine_mode fpcmp_mode
, intcmp_mode
;
19701 fpcmp_mode
= ix86_fp_compare_mode (code
);
19702 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19704 /* Do fcomi/sahf based test when profitable. */
19705 switch (ix86_fp_comparison_strategy (code
))
19707 case IX86_FPCMP_COMI
:
19708 intcmp_mode
= fpcmp_mode
;
19709 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19710 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19715 case IX86_FPCMP_SAHF
:
19716 intcmp_mode
= fpcmp_mode
;
19717 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19718 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19722 scratch
= gen_reg_rtx (HImode
);
19723 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19724 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19727 case IX86_FPCMP_ARITH
:
19728 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19729 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19730 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19732 scratch
= gen_reg_rtx (HImode
);
19733 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19735 /* In the unordered case, we have to check C2 for NaN's, which
19736 doesn't happen to work out to anything nice combination-wise.
19737 So do some bit twiddling on the value we've got in AH to come
19738 up with an appropriate set of condition codes. */
19740 intcmp_mode
= CCNOmode
;
19745 if (code
== GT
|| !TARGET_IEEE_FP
)
19747 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19752 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19753 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19754 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19755 intcmp_mode
= CCmode
;
19761 if (code
== LT
&& TARGET_IEEE_FP
)
19763 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19764 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19765 intcmp_mode
= CCmode
;
19770 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19776 if (code
== GE
|| !TARGET_IEEE_FP
)
19778 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19783 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19784 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19790 if (code
== LE
&& TARGET_IEEE_FP
)
19792 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19793 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19794 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19795 intcmp_mode
= CCmode
;
19800 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19806 if (code
== EQ
&& TARGET_IEEE_FP
)
19808 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19809 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19810 intcmp_mode
= CCmode
;
19815 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19821 if (code
== NE
&& TARGET_IEEE_FP
)
19823 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19824 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19830 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19836 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19840 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19845 gcc_unreachable ();
19853 /* Return the test that should be put into the flags user, i.e.
19854 the bcc, scc, or cmov instruction. */
19855 return gen_rtx_fmt_ee (code
, VOIDmode
,
19856 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19861 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19865 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19866 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19868 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19870 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19871 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19874 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19880 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19882 enum machine_mode mode
= GET_MODE (op0
);
19894 tmp
= ix86_expand_compare (code
, op0
, op1
);
19895 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19896 gen_rtx_LABEL_REF (VOIDmode
, label
),
19898 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19905 /* Expand DImode branch into multiple compare+branch. */
19908 rtx_code_label
*label2
;
19909 enum rtx_code code1
, code2
, code3
;
19910 enum machine_mode submode
;
19912 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19914 tmp
= op0
, op0
= op1
, op1
= tmp
;
19915 code
= swap_condition (code
);
19918 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19919 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19921 submode
= mode
== DImode
? SImode
: DImode
;
19923 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19924 avoid two branches. This costs one extra insn, so disable when
19925 optimizing for size. */
19927 if ((code
== EQ
|| code
== NE
)
19928 && (!optimize_insn_for_size_p ()
19929 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19934 if (hi
[1] != const0_rtx
)
19935 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19936 NULL_RTX
, 0, OPTAB_WIDEN
);
19939 if (lo
[1] != const0_rtx
)
19940 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19941 NULL_RTX
, 0, OPTAB_WIDEN
);
19943 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19944 NULL_RTX
, 0, OPTAB_WIDEN
);
19946 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19950 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19951 op1 is a constant and the low word is zero, then we can just
19952 examine the high word. Similarly for low word -1 and
19953 less-or-equal-than or greater-than. */
19955 if (CONST_INT_P (hi
[1]))
19958 case LT
: case LTU
: case GE
: case GEU
:
19959 if (lo
[1] == const0_rtx
)
19961 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19965 case LE
: case LEU
: case GT
: case GTU
:
19966 if (lo
[1] == constm1_rtx
)
19968 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19976 /* Otherwise, we need two or three jumps. */
19978 label2
= gen_label_rtx ();
19981 code2
= swap_condition (code
);
19982 code3
= unsigned_condition (code
);
19986 case LT
: case GT
: case LTU
: case GTU
:
19989 case LE
: code1
= LT
; code2
= GT
; break;
19990 case GE
: code1
= GT
; code2
= LT
; break;
19991 case LEU
: code1
= LTU
; code2
= GTU
; break;
19992 case GEU
: code1
= GTU
; code2
= LTU
; break;
19994 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19995 case NE
: code2
= UNKNOWN
; break;
19998 gcc_unreachable ();
20003 * if (hi(a) < hi(b)) goto true;
20004 * if (hi(a) > hi(b)) goto false;
20005 * if (lo(a) < lo(b)) goto true;
20009 if (code1
!= UNKNOWN
)
20010 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
20011 if (code2
!= UNKNOWN
)
20012 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
20014 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
20016 if (code2
!= UNKNOWN
)
20017 emit_label (label2
);
20022 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
20027 /* Split branch based on floating point condition. */
20029 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
20030 rtx target1
, rtx target2
, rtx tmp
)
20035 if (target2
!= pc_rtx
)
20038 code
= reverse_condition_maybe_unordered (code
);
20043 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
20046 i
= emit_jump_insn (gen_rtx_SET
20048 gen_rtx_IF_THEN_ELSE (VOIDmode
,
20049 condition
, target1
, target2
)));
20050 if (split_branch_probability
>= 0)
20051 add_int_reg_note (i
, REG_BR_PROB
, split_branch_probability
);
20055 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
20059 gcc_assert (GET_MODE (dest
) == QImode
);
20061 ret
= ix86_expand_compare (code
, op0
, op1
);
20062 PUT_MODE (ret
, QImode
);
20063 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
20066 /* Expand comparison setting or clearing carry flag. Return true when
20067 successful and set pop for the operation. */
20069 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
20071 enum machine_mode mode
=
20072 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
20074 /* Do not handle double-mode compares that go through special path. */
20075 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
20078 if (SCALAR_FLOAT_MODE_P (mode
))
20081 rtx_insn
*compare_seq
;
20083 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
20085 /* Shortcut: following common codes never translate
20086 into carry flag compares. */
20087 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
20088 || code
== ORDERED
|| code
== UNORDERED
)
20091 /* These comparisons require zero flag; swap operands so they won't. */
20092 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
20093 && !TARGET_IEEE_FP
)
20098 code
= swap_condition (code
);
20101 /* Try to expand the comparison and verify that we end up with
20102 carry flag based comparison. This fails to be true only when
20103 we decide to expand comparison using arithmetic that is not
20104 too common scenario. */
20106 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
20107 compare_seq
= get_insns ();
20110 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
20111 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
20112 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
20114 code
= GET_CODE (compare_op
);
20116 if (code
!= LTU
&& code
!= GEU
)
20119 emit_insn (compare_seq
);
20124 if (!INTEGRAL_MODE_P (mode
))
20133 /* Convert a==0 into (unsigned)a<1. */
20136 if (op1
!= const0_rtx
)
20139 code
= (code
== EQ
? LTU
: GEU
);
20142 /* Convert a>b into b<a or a>=b-1. */
20145 if (CONST_INT_P (op1
))
20147 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
20148 /* Bail out on overflow. We still can swap operands but that
20149 would force loading of the constant into register. */
20150 if (op1
== const0_rtx
20151 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
20153 code
= (code
== GTU
? GEU
: LTU
);
20160 code
= (code
== GTU
? LTU
: GEU
);
20164 /* Convert a>=0 into (unsigned)a<0x80000000. */
20167 if (mode
== DImode
|| op1
!= const0_rtx
)
20169 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
20170 code
= (code
== LT
? GEU
: LTU
);
20174 if (mode
== DImode
|| op1
!= constm1_rtx
)
20176 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
20177 code
= (code
== LE
? GEU
: LTU
);
20183 /* Swapping operands may cause constant to appear as first operand. */
20184 if (!nonimmediate_operand (op0
, VOIDmode
))
20186 if (!can_create_pseudo_p ())
20188 op0
= force_reg (mode
, op0
);
20190 *pop
= ix86_expand_compare (code
, op0
, op1
);
20191 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
20196 ix86_expand_int_movcc (rtx operands
[])
20198 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
20199 rtx_insn
*compare_seq
;
20201 enum machine_mode mode
= GET_MODE (operands
[0]);
20202 bool sign_bit_compare_p
= false;
20203 rtx op0
= XEXP (operands
[1], 0);
20204 rtx op1
= XEXP (operands
[1], 1);
20206 if (GET_MODE (op0
) == TImode
20207 || (GET_MODE (op0
) == DImode
20212 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20213 compare_seq
= get_insns ();
20216 compare_code
= GET_CODE (compare_op
);
20218 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
20219 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
20220 sign_bit_compare_p
= true;
20222 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20223 HImode insns, we'd be swallowed in word prefix ops. */
20225 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
20226 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
20227 && CONST_INT_P (operands
[2])
20228 && CONST_INT_P (operands
[3]))
20230 rtx out
= operands
[0];
20231 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
20232 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
20233 HOST_WIDE_INT diff
;
20236 /* Sign bit compares are better done using shifts than we do by using
20238 if (sign_bit_compare_p
20239 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20241 /* Detect overlap between destination and compare sources. */
20244 if (!sign_bit_compare_p
)
20247 bool fpcmp
= false;
20249 compare_code
= GET_CODE (compare_op
);
20251 flags
= XEXP (compare_op
, 0);
20253 if (GET_MODE (flags
) == CCFPmode
20254 || GET_MODE (flags
) == CCFPUmode
)
20258 = ix86_fp_compare_code_to_integer (compare_code
);
20261 /* To simplify rest of code, restrict to the GEU case. */
20262 if (compare_code
== LTU
)
20264 HOST_WIDE_INT tmp
= ct
;
20267 compare_code
= reverse_condition (compare_code
);
20268 code
= reverse_condition (code
);
20273 PUT_CODE (compare_op
,
20274 reverse_condition_maybe_unordered
20275 (GET_CODE (compare_op
)));
20277 PUT_CODE (compare_op
,
20278 reverse_condition (GET_CODE (compare_op
)));
20282 if (reg_overlap_mentioned_p (out
, op0
)
20283 || reg_overlap_mentioned_p (out
, op1
))
20284 tmp
= gen_reg_rtx (mode
);
20286 if (mode
== DImode
)
20287 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
20289 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
20290 flags
, compare_op
));
20294 if (code
== GT
|| code
== GE
)
20295 code
= reverse_condition (code
);
20298 HOST_WIDE_INT tmp
= ct
;
20303 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
20316 tmp
= expand_simple_binop (mode
, PLUS
,
20318 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20329 tmp
= expand_simple_binop (mode
, IOR
,
20331 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20333 else if (diff
== -1 && ct
)
20343 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20345 tmp
= expand_simple_binop (mode
, PLUS
,
20346 copy_rtx (tmp
), GEN_INT (cf
),
20347 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20355 * andl cf - ct, dest
20365 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20368 tmp
= expand_simple_binop (mode
, AND
,
20370 gen_int_mode (cf
- ct
, mode
),
20371 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20373 tmp
= expand_simple_binop (mode
, PLUS
,
20374 copy_rtx (tmp
), GEN_INT (ct
),
20375 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20378 if (!rtx_equal_p (tmp
, out
))
20379 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
20386 enum machine_mode cmp_mode
= GET_MODE (op0
);
20389 tmp
= ct
, ct
= cf
, cf
= tmp
;
20392 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20394 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20396 /* We may be reversing unordered compare to normal compare, that
20397 is not valid in general (we may convert non-trapping condition
20398 to trapping one), however on i386 we currently emit all
20399 comparisons unordered. */
20400 compare_code
= reverse_condition_maybe_unordered (compare_code
);
20401 code
= reverse_condition_maybe_unordered (code
);
20405 compare_code
= reverse_condition (compare_code
);
20406 code
= reverse_condition (code
);
20410 compare_code
= UNKNOWN
;
20411 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
20412 && CONST_INT_P (op1
))
20414 if (op1
== const0_rtx
20415 && (code
== LT
|| code
== GE
))
20416 compare_code
= code
;
20417 else if (op1
== constm1_rtx
)
20421 else if (code
== GT
)
20426 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20427 if (compare_code
!= UNKNOWN
20428 && GET_MODE (op0
) == GET_MODE (out
)
20429 && (cf
== -1 || ct
== -1))
20431 /* If lea code below could be used, only optimize
20432 if it results in a 2 insn sequence. */
20434 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20435 || diff
== 3 || diff
== 5 || diff
== 9)
20436 || (compare_code
== LT
&& ct
== -1)
20437 || (compare_code
== GE
&& cf
== -1))
20440 * notl op1 (if necessary)
20448 code
= reverse_condition (code
);
20451 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20453 out
= expand_simple_binop (mode
, IOR
,
20455 out
, 1, OPTAB_DIRECT
);
20456 if (out
!= operands
[0])
20457 emit_move_insn (operands
[0], out
);
20464 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20465 || diff
== 3 || diff
== 5 || diff
== 9)
20466 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
20468 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
20474 * lea cf(dest*(ct-cf)),dest
20478 * This also catches the degenerate setcc-only case.
20484 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20487 /* On x86_64 the lea instruction operates on Pmode, so we need
20488 to get arithmetics done in proper mode to match. */
20490 tmp
= copy_rtx (out
);
20494 out1
= copy_rtx (out
);
20495 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
20499 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
20505 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
20508 if (!rtx_equal_p (tmp
, out
))
20511 out
= force_operand (tmp
, copy_rtx (out
));
20513 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
20515 if (!rtx_equal_p (out
, operands
[0]))
20516 emit_move_insn (operands
[0], copy_rtx (out
));
20522 * General case: Jumpful:
20523 * xorl dest,dest cmpl op1, op2
20524 * cmpl op1, op2 movl ct, dest
20525 * setcc dest jcc 1f
20526 * decl dest movl cf, dest
20527 * andl (cf-ct),dest 1:
20530 * Size 20. Size 14.
20532 * This is reasonably steep, but branch mispredict costs are
20533 * high on modern cpus, so consider failing only if optimizing
20537 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20538 && BRANCH_COST (optimize_insn_for_speed_p (),
20543 enum machine_mode cmp_mode
= GET_MODE (op0
);
20548 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20550 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20552 /* We may be reversing unordered compare to normal compare,
20553 that is not valid in general (we may convert non-trapping
20554 condition to trapping one), however on i386 we currently
20555 emit all comparisons unordered. */
20556 code
= reverse_condition_maybe_unordered (code
);
20560 code
= reverse_condition (code
);
20561 if (compare_code
!= UNKNOWN
)
20562 compare_code
= reverse_condition (compare_code
);
20566 if (compare_code
!= UNKNOWN
)
20568 /* notl op1 (if needed)
20573 For x < 0 (resp. x <= -1) there will be no notl,
20574 so if possible swap the constants to get rid of the
20576 True/false will be -1/0 while code below (store flag
20577 followed by decrement) is 0/-1, so the constants need
20578 to be exchanged once more. */
20580 if (compare_code
== GE
|| !cf
)
20582 code
= reverse_condition (code
);
20587 HOST_WIDE_INT tmp
= cf
;
20592 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20596 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20598 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20600 copy_rtx (out
), 1, OPTAB_DIRECT
);
20603 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20604 gen_int_mode (cf
- ct
, mode
),
20605 copy_rtx (out
), 1, OPTAB_DIRECT
);
20607 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20608 copy_rtx (out
), 1, OPTAB_DIRECT
);
20609 if (!rtx_equal_p (out
, operands
[0]))
20610 emit_move_insn (operands
[0], copy_rtx (out
));
20616 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20618 /* Try a few things more with specific constants and a variable. */
20621 rtx var
, orig_out
, out
, tmp
;
20623 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20626 /* If one of the two operands is an interesting constant, load a
20627 constant with the above and mask it in with a logical operation. */
20629 if (CONST_INT_P (operands
[2]))
20632 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20633 operands
[3] = constm1_rtx
, op
= and_optab
;
20634 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20635 operands
[3] = const0_rtx
, op
= ior_optab
;
20639 else if (CONST_INT_P (operands
[3]))
20642 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20643 operands
[2] = constm1_rtx
, op
= and_optab
;
20644 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20645 operands
[2] = const0_rtx
, op
= ior_optab
;
20652 orig_out
= operands
[0];
20653 tmp
= gen_reg_rtx (mode
);
20656 /* Recurse to get the constant loaded. */
20657 if (ix86_expand_int_movcc (operands
) == 0)
20660 /* Mask in the interesting variable. */
20661 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20663 if (!rtx_equal_p (out
, orig_out
))
20664 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20670 * For comparison with above,
20680 if (! nonimmediate_operand (operands
[2], mode
))
20681 operands
[2] = force_reg (mode
, operands
[2]);
20682 if (! nonimmediate_operand (operands
[3], mode
))
20683 operands
[3] = force_reg (mode
, operands
[3]);
20685 if (! register_operand (operands
[2], VOIDmode
)
20687 || ! register_operand (operands
[3], VOIDmode
)))
20688 operands
[2] = force_reg (mode
, operands
[2]);
20691 && ! register_operand (operands
[3], VOIDmode
))
20692 operands
[3] = force_reg (mode
, operands
[3]);
20694 emit_insn (compare_seq
);
20695 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20696 gen_rtx_IF_THEN_ELSE (mode
,
20697 compare_op
, operands
[2],
20702 /* Swap, force into registers, or otherwise massage the two operands
20703 to an sse comparison with a mask result. Thus we differ a bit from
20704 ix86_prepare_fp_compare_args which expects to produce a flags result.
20706 The DEST operand exists to help determine whether to commute commutative
20707 operators. The POP0/POP1 operands are updated in place. The new
20708 comparison code is returned, or UNKNOWN if not implementable. */
20710 static enum rtx_code
20711 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20712 rtx
*pop0
, rtx
*pop1
)
20720 /* AVX supports all the needed comparisons. */
20723 /* We have no LTGT as an operator. We could implement it with
20724 NE & ORDERED, but this requires an extra temporary. It's
20725 not clear that it's worth it. */
20732 /* These are supported directly. */
20739 /* AVX has 3 operand comparisons, no need to swap anything. */
20742 /* For commutative operators, try to canonicalize the destination
20743 operand to be first in the comparison - this helps reload to
20744 avoid extra moves. */
20745 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20753 /* These are not supported directly before AVX, and furthermore
20754 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20755 comparison operands to transform into something that is
20760 code
= swap_condition (code
);
20764 gcc_unreachable ();
20770 /* Detect conditional moves that exactly match min/max operational
20771 semantics. Note that this is IEEE safe, as long as we don't
20772 interchange the operands.
20774 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20775 and TRUE if the operation is successful and instructions are emitted. */
20778 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20779 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20781 enum machine_mode mode
;
20787 else if (code
== UNGE
)
20790 if_true
= if_false
;
20796 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20798 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20803 mode
= GET_MODE (dest
);
20805 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20806 but MODE may be a vector mode and thus not appropriate. */
20807 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20809 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20812 if_true
= force_reg (mode
, if_true
);
20813 v
= gen_rtvec (2, if_true
, if_false
);
20814 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20818 code
= is_min
? SMIN
: SMAX
;
20819 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20822 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20826 /* Expand an sse vector comparison. Return the register with the result. */
20829 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20830 rtx op_true
, rtx op_false
)
20832 enum machine_mode mode
= GET_MODE (dest
);
20833 enum machine_mode cmp_ops_mode
= GET_MODE (cmp_op0
);
20835 /* In general case result of comparison can differ from operands' type. */
20836 enum machine_mode cmp_mode
;
20838 /* In AVX512F the result of comparison is an integer mask. */
20839 bool maskcmp
= false;
20842 if (GET_MODE_SIZE (cmp_ops_mode
) == 64)
20844 cmp_mode
= mode_for_size (GET_MODE_NUNITS (cmp_ops_mode
), MODE_INT
, 0);
20845 gcc_assert (cmp_mode
!= BLKmode
);
20850 cmp_mode
= cmp_ops_mode
;
20853 cmp_op0
= force_reg (cmp_ops_mode
, cmp_op0
);
20854 if (!nonimmediate_operand (cmp_op1
, cmp_ops_mode
))
20855 cmp_op1
= force_reg (cmp_ops_mode
, cmp_op1
);
20858 || reg_overlap_mentioned_p (dest
, op_true
)
20859 || reg_overlap_mentioned_p (dest
, op_false
))
20860 dest
= gen_reg_rtx (maskcmp
? cmp_mode
: mode
);
20862 /* Compare patterns for int modes are unspec in AVX512F only. */
20863 if (maskcmp
&& (code
== GT
|| code
== EQ
))
20865 rtx (*gen
)(rtx
, rtx
, rtx
);
20867 switch (cmp_ops_mode
)
20870 gen
= code
== GT
? gen_avx512f_gtv16si3
: gen_avx512f_eqv16si3_1
;
20873 gen
= code
== GT
? gen_avx512f_gtv8di3
: gen_avx512f_eqv8di3_1
;
20881 emit_insn (gen (dest
, cmp_op0
, cmp_op1
));
20885 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20887 if (cmp_mode
!= mode
&& !maskcmp
)
20889 x
= force_reg (cmp_ops_mode
, x
);
20890 convert_move (dest
, x
, false);
20893 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20898 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20899 operations. This is used for both scalar and vector conditional moves. */
20902 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20904 enum machine_mode mode
= GET_MODE (dest
);
20905 enum machine_mode cmpmode
= GET_MODE (cmp
);
20907 /* In AVX512F the result of comparison is an integer mask. */
20908 bool maskcmp
= (mode
!= cmpmode
&& TARGET_AVX512F
);
20912 if (vector_all_ones_operand (op_true
, mode
)
20913 && rtx_equal_p (op_false
, CONST0_RTX (mode
))
20916 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20918 else if (op_false
== CONST0_RTX (mode
)
20921 op_true
= force_reg (mode
, op_true
);
20922 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20923 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20925 else if (op_true
== CONST0_RTX (mode
)
20928 op_false
= force_reg (mode
, op_false
);
20929 x
= gen_rtx_NOT (mode
, cmp
);
20930 x
= gen_rtx_AND (mode
, x
, op_false
);
20931 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20933 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
)
20936 op_false
= force_reg (mode
, op_false
);
20937 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20938 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20940 else if (TARGET_XOP
20943 op_true
= force_reg (mode
, op_true
);
20945 if (!nonimmediate_operand (op_false
, mode
))
20946 op_false
= force_reg (mode
, op_false
);
20948 emit_insn (gen_rtx_SET (mode
, dest
,
20949 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20955 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20958 if (!nonimmediate_operand (op_true
, mode
))
20959 op_true
= force_reg (mode
, op_true
);
20961 op_false
= force_reg (mode
, op_false
);
20967 gen
= gen_sse4_1_blendvps
;
20971 gen
= gen_sse4_1_blendvpd
;
20979 gen
= gen_sse4_1_pblendvb
;
20980 if (mode
!= V16QImode
)
20981 d
= gen_reg_rtx (V16QImode
);
20982 op_false
= gen_lowpart (V16QImode
, op_false
);
20983 op_true
= gen_lowpart (V16QImode
, op_true
);
20984 cmp
= gen_lowpart (V16QImode
, cmp
);
20989 gen
= gen_avx_blendvps256
;
20993 gen
= gen_avx_blendvpd256
;
21001 gen
= gen_avx2_pblendvb
;
21002 if (mode
!= V32QImode
)
21003 d
= gen_reg_rtx (V32QImode
);
21004 op_false
= gen_lowpart (V32QImode
, op_false
);
21005 op_true
= gen_lowpart (V32QImode
, op_true
);
21006 cmp
= gen_lowpart (V32QImode
, cmp
);
21011 gen
= gen_avx512f_blendmv16si
;
21014 gen
= gen_avx512f_blendmv8di
;
21017 gen
= gen_avx512f_blendmv8df
;
21020 gen
= gen_avx512f_blendmv16sf
;
21029 emit_insn (gen (d
, op_false
, op_true
, cmp
));
21031 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
21035 op_true
= force_reg (mode
, op_true
);
21037 t2
= gen_reg_rtx (mode
);
21039 t3
= gen_reg_rtx (mode
);
21043 x
= gen_rtx_AND (mode
, op_true
, cmp
);
21044 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
21046 x
= gen_rtx_NOT (mode
, cmp
);
21047 x
= gen_rtx_AND (mode
, x
, op_false
);
21048 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
21050 x
= gen_rtx_IOR (mode
, t3
, t2
);
21051 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
21056 /* Expand a floating-point conditional move. Return true if successful. */
21059 ix86_expand_fp_movcc (rtx operands
[])
21061 enum machine_mode mode
= GET_MODE (operands
[0]);
21062 enum rtx_code code
= GET_CODE (operands
[1]);
21063 rtx tmp
, compare_op
;
21064 rtx op0
= XEXP (operands
[1], 0);
21065 rtx op1
= XEXP (operands
[1], 1);
21067 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
21069 enum machine_mode cmode
;
21071 /* Since we've no cmove for sse registers, don't force bad register
21072 allocation just to gain access to it. Deny movcc when the
21073 comparison mode doesn't match the move mode. */
21074 cmode
= GET_MODE (op0
);
21075 if (cmode
== VOIDmode
)
21076 cmode
= GET_MODE (op1
);
21080 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
21081 if (code
== UNKNOWN
)
21084 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
21085 operands
[2], operands
[3]))
21088 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
21089 operands
[2], operands
[3]);
21090 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
21094 if (GET_MODE (op0
) == TImode
21095 || (GET_MODE (op0
) == DImode
21099 /* The floating point conditional move instructions don't directly
21100 support conditions resulting from a signed integer comparison. */
21102 compare_op
= ix86_expand_compare (code
, op0
, op1
);
21103 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
21105 tmp
= gen_reg_rtx (QImode
);
21106 ix86_expand_setcc (tmp
, code
, op0
, op1
);
21108 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
21111 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
21112 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
21113 operands
[2], operands
[3])));
21118 /* Expand a floating-point vector conditional move; a vcond operation
21119 rather than a movcc operation. */
21122 ix86_expand_fp_vcond (rtx operands
[])
21124 enum rtx_code code
= GET_CODE (operands
[3]);
21127 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
21128 &operands
[4], &operands
[5]);
21129 if (code
== UNKNOWN
)
21132 switch (GET_CODE (operands
[3]))
21135 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
21136 operands
[5], operands
[0], operands
[0]);
21137 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
21138 operands
[5], operands
[1], operands
[2]);
21142 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
21143 operands
[5], operands
[0], operands
[0]);
21144 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
21145 operands
[5], operands
[1], operands
[2]);
21149 gcc_unreachable ();
21151 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
21153 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
21157 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
21158 operands
[5], operands
[1], operands
[2]))
21161 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
21162 operands
[1], operands
[2]);
21163 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
21167 /* Expand a signed/unsigned integral vector conditional move. */
21170 ix86_expand_int_vcond (rtx operands
[])
21172 enum machine_mode data_mode
= GET_MODE (operands
[0]);
21173 enum machine_mode mode
= GET_MODE (operands
[4]);
21174 enum rtx_code code
= GET_CODE (operands
[3]);
21175 bool negate
= false;
21178 cop0
= operands
[4];
21179 cop1
= operands
[5];
21181 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21182 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21183 if ((code
== LT
|| code
== GE
)
21184 && data_mode
== mode
21185 && cop1
== CONST0_RTX (mode
)
21186 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
21187 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
21188 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
21189 && (GET_MODE_SIZE (data_mode
) == 16
21190 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
21192 rtx negop
= operands
[2 - (code
== LT
)];
21193 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
21194 if (negop
== CONST1_RTX (data_mode
))
21196 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
21197 operands
[0], 1, OPTAB_DIRECT
);
21198 if (res
!= operands
[0])
21199 emit_move_insn (operands
[0], res
);
21202 else if (GET_MODE_INNER (data_mode
) != DImode
21203 && vector_all_ones_operand (negop
, data_mode
))
21205 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
21206 operands
[0], 0, OPTAB_DIRECT
);
21207 if (res
!= operands
[0])
21208 emit_move_insn (operands
[0], res
);
21213 if (!nonimmediate_operand (cop1
, mode
))
21214 cop1
= force_reg (mode
, cop1
);
21215 if (!general_operand (operands
[1], data_mode
))
21216 operands
[1] = force_reg (data_mode
, operands
[1]);
21217 if (!general_operand (operands
[2], data_mode
))
21218 operands
[2] = force_reg (data_mode
, operands
[2]);
21220 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21222 && (mode
== V16QImode
|| mode
== V8HImode
21223 || mode
== V4SImode
|| mode
== V2DImode
))
21227 /* Canonicalize the comparison to EQ, GT, GTU. */
21238 code
= reverse_condition (code
);
21244 code
= reverse_condition (code
);
21250 code
= swap_condition (code
);
21251 x
= cop0
, cop0
= cop1
, cop1
= x
;
21255 gcc_unreachable ();
21258 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21259 if (mode
== V2DImode
)
21264 /* SSE4.1 supports EQ. */
21265 if (!TARGET_SSE4_1
)
21271 /* SSE4.2 supports GT/GTU. */
21272 if (!TARGET_SSE4_2
)
21277 gcc_unreachable ();
21281 /* Unsigned parallel compare is not supported by the hardware.
21282 Play some tricks to turn this into a signed comparison
21286 cop0
= force_reg (mode
, cop0
);
21298 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
21302 case V16SImode
: gen_sub3
= gen_subv16si3
; break;
21303 case V8DImode
: gen_sub3
= gen_subv8di3
; break;
21304 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
21305 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
21306 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
21307 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
21309 gcc_unreachable ();
21311 /* Subtract (-(INT MAX) - 1) from both operands to make
21313 mask
= ix86_build_signbit_mask (mode
, true, false);
21314 t1
= gen_reg_rtx (mode
);
21315 emit_insn (gen_sub3 (t1
, cop0
, mask
));
21317 t2
= gen_reg_rtx (mode
);
21318 emit_insn (gen_sub3 (t2
, cop1
, mask
));
21330 /* Perform a parallel unsigned saturating subtraction. */
21331 x
= gen_reg_rtx (mode
);
21332 emit_insn (gen_rtx_SET (VOIDmode
, x
,
21333 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
21336 cop1
= CONST0_RTX (mode
);
21342 gcc_unreachable ();
21347 /* Allow the comparison to be done in one mode, but the movcc to
21348 happen in another mode. */
21349 if (data_mode
== mode
)
21351 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
21352 operands
[1+negate
], operands
[2-negate
]);
21356 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
21357 x
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), code
, cop0
, cop1
,
21358 operands
[1+negate
], operands
[2-negate
]);
21359 if (GET_MODE (x
) == mode
)
21360 x
= gen_lowpart (data_mode
, x
);
21363 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
21364 operands
[2-negate
]);
21369 ix86_expand_vec_perm_vpermi2 (rtx target
, rtx op0
, rtx mask
, rtx op1
)
21371 enum machine_mode mode
= GET_MODE (op0
);
21375 emit_insn (gen_avx512f_vpermi2varv16si3 (target
, op0
,
21376 force_reg (V16SImode
, mask
),
21380 emit_insn (gen_avx512f_vpermi2varv16sf3 (target
, op0
,
21381 force_reg (V16SImode
, mask
),
21385 emit_insn (gen_avx512f_vpermi2varv8di3 (target
, op0
,
21386 force_reg (V8DImode
, mask
), op1
));
21389 emit_insn (gen_avx512f_vpermi2varv8df3 (target
, op0
,
21390 force_reg (V8DImode
, mask
), op1
));
21397 /* Expand a variable vector permutation. */
21400 ix86_expand_vec_perm (rtx operands
[])
21402 rtx target
= operands
[0];
21403 rtx op0
= operands
[1];
21404 rtx op1
= operands
[2];
21405 rtx mask
= operands
[3];
21406 rtx t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, vt
, vt2
, vec
[32];
21407 enum machine_mode mode
= GET_MODE (op0
);
21408 enum machine_mode maskmode
= GET_MODE (mask
);
21410 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
21412 /* Number of elements in the vector. */
21413 w
= GET_MODE_NUNITS (mode
);
21414 e
= GET_MODE_UNIT_SIZE (mode
);
21415 gcc_assert (w
<= 64);
21417 if (ix86_expand_vec_perm_vpermi2 (target
, op0
, mask
, op1
))
21422 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
21424 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21425 an constant shuffle operand. With a tiny bit of effort we can
21426 use VPERMD instead. A re-interpretation stall for V4DFmode is
21427 unfortunate but there's no avoiding it.
21428 Similarly for V16HImode we don't have instructions for variable
21429 shuffling, while for V32QImode we can use after preparing suitable
21430 masks vpshufb; vpshufb; vpermq; vpor. */
21432 if (mode
== V16HImode
)
21434 maskmode
= mode
= V32QImode
;
21440 maskmode
= mode
= V8SImode
;
21444 t1
= gen_reg_rtx (maskmode
);
21446 /* Replicate the low bits of the V4DImode mask into V8SImode:
21448 t1 = { A A B B C C D D }. */
21449 for (i
= 0; i
< w
/ 2; ++i
)
21450 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
21451 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21452 vt
= force_reg (maskmode
, vt
);
21453 mask
= gen_lowpart (maskmode
, mask
);
21454 if (maskmode
== V8SImode
)
21455 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
21457 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
21459 /* Multiply the shuffle indicies by two. */
21460 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
21463 /* Add one to the odd shuffle indicies:
21464 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21465 for (i
= 0; i
< w
/ 2; ++i
)
21467 vec
[i
* 2] = const0_rtx
;
21468 vec
[i
* 2 + 1] = const1_rtx
;
21470 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21471 vt
= validize_mem (force_const_mem (maskmode
, vt
));
21472 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
21475 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21476 operands
[3] = mask
= t1
;
21477 target
= gen_reg_rtx (mode
);
21478 op0
= gen_lowpart (mode
, op0
);
21479 op1
= gen_lowpart (mode
, op1
);
21485 /* The VPERMD and VPERMPS instructions already properly ignore
21486 the high bits of the shuffle elements. No need for us to
21487 perform an AND ourselves. */
21488 if (one_operand_shuffle
)
21490 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
21491 if (target
!= operands
[0])
21492 emit_move_insn (operands
[0],
21493 gen_lowpart (GET_MODE (operands
[0]), target
));
21497 t1
= gen_reg_rtx (V8SImode
);
21498 t2
= gen_reg_rtx (V8SImode
);
21499 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
21500 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
21506 mask
= gen_lowpart (V8SImode
, mask
);
21507 if (one_operand_shuffle
)
21508 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
21511 t1
= gen_reg_rtx (V8SFmode
);
21512 t2
= gen_reg_rtx (V8SFmode
);
21513 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
21514 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
21520 /* By combining the two 128-bit input vectors into one 256-bit
21521 input vector, we can use VPERMD and VPERMPS for the full
21522 two-operand shuffle. */
21523 t1
= gen_reg_rtx (V8SImode
);
21524 t2
= gen_reg_rtx (V8SImode
);
21525 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
21526 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21527 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
21528 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
21532 t1
= gen_reg_rtx (V8SFmode
);
21533 t2
= gen_reg_rtx (V8SImode
);
21534 mask
= gen_lowpart (V4SImode
, mask
);
21535 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
21536 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21537 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
21538 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
21542 t1
= gen_reg_rtx (V32QImode
);
21543 t2
= gen_reg_rtx (V32QImode
);
21544 t3
= gen_reg_rtx (V32QImode
);
21545 vt2
= GEN_INT (-128);
21546 for (i
= 0; i
< 32; i
++)
21548 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21549 vt
= force_reg (V32QImode
, vt
);
21550 for (i
= 0; i
< 32; i
++)
21551 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
21552 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21553 vt2
= force_reg (V32QImode
, vt2
);
21554 /* From mask create two adjusted masks, which contain the same
21555 bits as mask in the low 7 bits of each vector element.
21556 The first mask will have the most significant bit clear
21557 if it requests element from the same 128-bit lane
21558 and MSB set if it requests element from the other 128-bit lane.
21559 The second mask will have the opposite values of the MSB,
21560 and additionally will have its 128-bit lanes swapped.
21561 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21562 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21563 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21564 stands for other 12 bytes. */
21565 /* The bit whether element is from the same lane or the other
21566 lane is bit 4, so shift it up by 3 to the MSB position. */
21567 t5
= gen_reg_rtx (V4DImode
);
21568 emit_insn (gen_ashlv4di3 (t5
, gen_lowpart (V4DImode
, mask
),
21570 /* Clear MSB bits from the mask just in case it had them set. */
21571 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
21572 /* After this t1 will have MSB set for elements from other lane. */
21573 emit_insn (gen_xorv32qi3 (t1
, gen_lowpart (V32QImode
, t5
), vt2
));
21574 /* Clear bits other than MSB. */
21575 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
21576 /* Or in the lower bits from mask into t3. */
21577 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
21578 /* And invert MSB bits in t1, so MSB is set for elements from the same
21580 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
21581 /* Swap 128-bit lanes in t3. */
21582 t6
= gen_reg_rtx (V4DImode
);
21583 emit_insn (gen_avx2_permv4di_1 (t6
, gen_lowpart (V4DImode
, t3
),
21584 const2_rtx
, GEN_INT (3),
21585 const0_rtx
, const1_rtx
));
21586 /* And or in the lower bits from mask into t1. */
21587 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
21588 if (one_operand_shuffle
)
21590 /* Each of these shuffles will put 0s in places where
21591 element from the other 128-bit lane is needed, otherwise
21592 will shuffle in the requested value. */
21593 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
,
21594 gen_lowpart (V32QImode
, t6
)));
21595 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
21596 /* For t3 the 128-bit lanes are swapped again. */
21597 t7
= gen_reg_rtx (V4DImode
);
21598 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t3
),
21599 const2_rtx
, GEN_INT (3),
21600 const0_rtx
, const1_rtx
));
21601 /* And oring both together leads to the result. */
21602 emit_insn (gen_iorv32qi3 (target
, t1
,
21603 gen_lowpart (V32QImode
, t7
)));
21604 if (target
!= operands
[0])
21605 emit_move_insn (operands
[0],
21606 gen_lowpart (GET_MODE (operands
[0]), target
));
21610 t4
= gen_reg_rtx (V32QImode
);
21611 /* Similarly to the above one_operand_shuffle code,
21612 just for repeated twice for each operand. merge_two:
21613 code will merge the two results together. */
21614 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
,
21615 gen_lowpart (V32QImode
, t6
)));
21616 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
,
21617 gen_lowpart (V32QImode
, t6
)));
21618 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
21619 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
21620 t7
= gen_reg_rtx (V4DImode
);
21621 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t4
),
21622 const2_rtx
, GEN_INT (3),
21623 const0_rtx
, const1_rtx
));
21624 t8
= gen_reg_rtx (V4DImode
);
21625 emit_insn (gen_avx2_permv4di_1 (t8
, gen_lowpart (V4DImode
, t3
),
21626 const2_rtx
, GEN_INT (3),
21627 const0_rtx
, const1_rtx
));
21628 emit_insn (gen_iorv32qi3 (t4
, t2
, gen_lowpart (V32QImode
, t7
)));
21629 emit_insn (gen_iorv32qi3 (t3
, t1
, gen_lowpart (V32QImode
, t8
)));
21635 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
21642 /* The XOP VPPERM insn supports three inputs. By ignoring the
21643 one_operand_shuffle special case, we avoid creating another
21644 set of constant vectors in memory. */
21645 one_operand_shuffle
= false;
21647 /* mask = mask & {2*w-1, ...} */
21648 vt
= GEN_INT (2*w
- 1);
21652 /* mask = mask & {w-1, ...} */
21653 vt
= GEN_INT (w
- 1);
21656 for (i
= 0; i
< w
; i
++)
21658 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21659 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21660 NULL_RTX
, 0, OPTAB_DIRECT
);
21662 /* For non-QImode operations, convert the word permutation control
21663 into a byte permutation control. */
21664 if (mode
!= V16QImode
)
21666 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
21667 GEN_INT (exact_log2 (e
)),
21668 NULL_RTX
, 0, OPTAB_DIRECT
);
21670 /* Convert mask to vector of chars. */
21671 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
21673 /* Replicate each of the input bytes into byte positions:
21674 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21675 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21676 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21677 for (i
= 0; i
< 16; ++i
)
21678 vec
[i
] = GEN_INT (i
/e
* e
);
21679 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21680 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21682 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
21684 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
21686 /* Convert it into the byte positions by doing
21687 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21688 for (i
= 0; i
< 16; ++i
)
21689 vec
[i
] = GEN_INT (i
% e
);
21690 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21691 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21692 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
21695 /* The actual shuffle operations all operate on V16QImode. */
21696 op0
= gen_lowpart (V16QImode
, op0
);
21697 op1
= gen_lowpart (V16QImode
, op1
);
21701 if (GET_MODE (target
) != V16QImode
)
21702 target
= gen_reg_rtx (V16QImode
);
21703 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
21704 if (target
!= operands
[0])
21705 emit_move_insn (operands
[0],
21706 gen_lowpart (GET_MODE (operands
[0]), target
));
21708 else if (one_operand_shuffle
)
21710 if (GET_MODE (target
) != V16QImode
)
21711 target
= gen_reg_rtx (V16QImode
);
21712 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
21713 if (target
!= operands
[0])
21714 emit_move_insn (operands
[0],
21715 gen_lowpart (GET_MODE (operands
[0]), target
));
21722 /* Shuffle the two input vectors independently. */
21723 t1
= gen_reg_rtx (V16QImode
);
21724 t2
= gen_reg_rtx (V16QImode
);
21725 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
21726 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21729 /* Then merge them together. The key is whether any given control
21730 element contained a bit set that indicates the second word. */
21731 mask
= operands
[3];
21733 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21735 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21736 more shuffle to convert the V2DI input mask into a V4SI
21737 input mask. At which point the masking that expand_int_vcond
21738 will work as desired. */
21739 rtx t3
= gen_reg_rtx (V4SImode
);
21740 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21741 const0_rtx
, const0_rtx
,
21742 const2_rtx
, const2_rtx
));
21744 maskmode
= V4SImode
;
21748 for (i
= 0; i
< w
; i
++)
21750 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21751 vt
= force_reg (maskmode
, vt
);
21752 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21753 NULL_RTX
, 0, OPTAB_DIRECT
);
21755 if (GET_MODE (target
) != mode
)
21756 target
= gen_reg_rtx (mode
);
21758 xops
[1] = gen_lowpart (mode
, t2
);
21759 xops
[2] = gen_lowpart (mode
, t1
);
21760 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21763 ok
= ix86_expand_int_vcond (xops
);
21765 if (target
!= operands
[0])
21766 emit_move_insn (operands
[0],
21767 gen_lowpart (GET_MODE (operands
[0]), target
));
21771 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21772 true if we should do zero extension, else sign extension. HIGH_P is
21773 true if we want the N/2 high elements, else the low elements. */
21776 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21778 enum machine_mode imode
= GET_MODE (src
);
21783 rtx (*unpack
)(rtx
, rtx
);
21784 rtx (*extract
)(rtx
, rtx
) = NULL
;
21785 enum machine_mode halfmode
= BLKmode
;
21791 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21793 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21794 halfmode
= V16QImode
;
21796 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21800 unpack
= gen_avx512f_zero_extendv16hiv16si2
;
21802 unpack
= gen_avx512f_sign_extendv16hiv16si2
;
21803 halfmode
= V16HImode
;
21805 = high_p
? gen_vec_extract_hi_v32hi
: gen_vec_extract_lo_v32hi
;
21809 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21811 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21812 halfmode
= V8HImode
;
21814 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21818 unpack
= gen_avx512f_zero_extendv8siv8di2
;
21820 unpack
= gen_avx512f_sign_extendv8siv8di2
;
21821 halfmode
= V8SImode
;
21823 = high_p
? gen_vec_extract_hi_v16si
: gen_vec_extract_lo_v16si
;
21827 unpack
= gen_avx2_zero_extendv4siv4di2
;
21829 unpack
= gen_avx2_sign_extendv4siv4di2
;
21830 halfmode
= V4SImode
;
21832 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21836 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21838 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21842 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21844 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21848 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21850 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21853 gcc_unreachable ();
21856 if (GET_MODE_SIZE (imode
) >= 32)
21858 tmp
= gen_reg_rtx (halfmode
);
21859 emit_insn (extract (tmp
, src
));
21863 /* Shift higher 8 bytes to lower 8 bytes. */
21864 tmp
= gen_reg_rtx (V1TImode
);
21865 emit_insn (gen_sse2_lshrv1ti3 (tmp
, gen_lowpart (V1TImode
, src
),
21867 tmp
= gen_lowpart (imode
, tmp
);
21872 emit_insn (unpack (dest
, tmp
));
21876 rtx (*unpack
)(rtx
, rtx
, rtx
);
21882 unpack
= gen_vec_interleave_highv16qi
;
21884 unpack
= gen_vec_interleave_lowv16qi
;
21888 unpack
= gen_vec_interleave_highv8hi
;
21890 unpack
= gen_vec_interleave_lowv8hi
;
21894 unpack
= gen_vec_interleave_highv4si
;
21896 unpack
= gen_vec_interleave_lowv4si
;
21899 gcc_unreachable ();
21903 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21905 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21906 src
, pc_rtx
, pc_rtx
);
21908 rtx tmp2
= gen_reg_rtx (imode
);
21909 emit_insn (unpack (tmp2
, src
, tmp
));
21910 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), tmp2
));
21914 /* Expand conditional increment or decrement using adb/sbb instructions.
21915 The default case using setcc followed by the conditional move can be
21916 done by generic code. */
21918 ix86_expand_int_addcc (rtx operands
[])
21920 enum rtx_code code
= GET_CODE (operands
[1]);
21922 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21924 rtx val
= const0_rtx
;
21925 bool fpcmp
= false;
21926 enum machine_mode mode
;
21927 rtx op0
= XEXP (operands
[1], 0);
21928 rtx op1
= XEXP (operands
[1], 1);
21930 if (operands
[3] != const1_rtx
21931 && operands
[3] != constm1_rtx
)
21933 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21935 code
= GET_CODE (compare_op
);
21937 flags
= XEXP (compare_op
, 0);
21939 if (GET_MODE (flags
) == CCFPmode
21940 || GET_MODE (flags
) == CCFPUmode
)
21943 code
= ix86_fp_compare_code_to_integer (code
);
21950 PUT_CODE (compare_op
,
21951 reverse_condition_maybe_unordered
21952 (GET_CODE (compare_op
)));
21954 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21957 mode
= GET_MODE (operands
[0]);
21959 /* Construct either adc or sbb insn. */
21960 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21965 insn
= gen_subqi3_carry
;
21968 insn
= gen_subhi3_carry
;
21971 insn
= gen_subsi3_carry
;
21974 insn
= gen_subdi3_carry
;
21977 gcc_unreachable ();
21985 insn
= gen_addqi3_carry
;
21988 insn
= gen_addhi3_carry
;
21991 insn
= gen_addsi3_carry
;
21994 insn
= gen_adddi3_carry
;
21997 gcc_unreachable ();
22000 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
22006 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22007 but works for floating pointer parameters and nonoffsetable memories.
22008 For pushes, it returns just stack offsets; the values will be saved
22009 in the right order. Maximally three parts are generated. */
22012 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
22017 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
22019 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
22021 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
22022 gcc_assert (size
>= 2 && size
<= 4);
22024 /* Optimize constant pool reference to immediates. This is used by fp
22025 moves, that force all constants to memory to allow combining. */
22026 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
22028 rtx tmp
= maybe_get_pool_constant (operand
);
22033 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
22035 /* The only non-offsetable memories we handle are pushes. */
22036 int ok
= push_operand (operand
, VOIDmode
);
22040 operand
= copy_rtx (operand
);
22041 PUT_MODE (operand
, word_mode
);
22042 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
22046 if (GET_CODE (operand
) == CONST_VECTOR
)
22048 enum machine_mode imode
= int_mode_for_mode (mode
);
22049 /* Caution: if we looked through a constant pool memory above,
22050 the operand may actually have a different mode now. That's
22051 ok, since we want to pun this all the way back to an integer. */
22052 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
22053 gcc_assert (operand
!= NULL
);
22059 if (mode
== DImode
)
22060 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
22065 if (REG_P (operand
))
22067 gcc_assert (reload_completed
);
22068 for (i
= 0; i
< size
; i
++)
22069 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
22071 else if (offsettable_memref_p (operand
))
22073 operand
= adjust_address (operand
, SImode
, 0);
22074 parts
[0] = operand
;
22075 for (i
= 1; i
< size
; i
++)
22076 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
22078 else if (GET_CODE (operand
) == CONST_DOUBLE
)
22083 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
22087 real_to_target (l
, &r
, mode
);
22088 parts
[3] = gen_int_mode (l
[3], SImode
);
22089 parts
[2] = gen_int_mode (l
[2], SImode
);
22092 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22093 long double may not be 80-bit. */
22094 real_to_target (l
, &r
, mode
);
22095 parts
[2] = gen_int_mode (l
[2], SImode
);
22098 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
22101 gcc_unreachable ();
22103 parts
[1] = gen_int_mode (l
[1], SImode
);
22104 parts
[0] = gen_int_mode (l
[0], SImode
);
22107 gcc_unreachable ();
22112 if (mode
== TImode
)
22113 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
22114 if (mode
== XFmode
|| mode
== TFmode
)
22116 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
22117 if (REG_P (operand
))
22119 gcc_assert (reload_completed
);
22120 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
22121 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
22123 else if (offsettable_memref_p (operand
))
22125 operand
= adjust_address (operand
, DImode
, 0);
22126 parts
[0] = operand
;
22127 parts
[1] = adjust_address (operand
, upper_mode
, 8);
22129 else if (GET_CODE (operand
) == CONST_DOUBLE
)
22134 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
22135 real_to_target (l
, &r
, mode
);
22137 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22138 if (HOST_BITS_PER_WIDE_INT
>= 64)
22141 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
22142 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
22145 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
22147 if (upper_mode
== SImode
)
22148 parts
[1] = gen_int_mode (l
[2], SImode
);
22149 else if (HOST_BITS_PER_WIDE_INT
>= 64)
22152 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
22153 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
22156 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
22159 gcc_unreachable ();
22166 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22167 Return false when normal moves are needed; true when all required
22168 insns have been emitted. Operands 2-4 contain the input values
22169 int the correct order; operands 5-7 contain the output values. */
22172 ix86_split_long_move (rtx operands
[])
22177 int collisions
= 0;
22178 enum machine_mode mode
= GET_MODE (operands
[0]);
22179 bool collisionparts
[4];
22181 /* The DFmode expanders may ask us to move double.
22182 For 64bit target this is single move. By hiding the fact
22183 here we simplify i386.md splitters. */
22184 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
22186 /* Optimize constant pool reference to immediates. This is used by
22187 fp moves, that force all constants to memory to allow combining. */
22189 if (MEM_P (operands
[1])
22190 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
22191 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
22192 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
22193 if (push_operand (operands
[0], VOIDmode
))
22195 operands
[0] = copy_rtx (operands
[0]);
22196 PUT_MODE (operands
[0], word_mode
);
22199 operands
[0] = gen_lowpart (DImode
, operands
[0]);
22200 operands
[1] = gen_lowpart (DImode
, operands
[1]);
22201 emit_move_insn (operands
[0], operands
[1]);
22205 /* The only non-offsettable memory we handle is push. */
22206 if (push_operand (operands
[0], VOIDmode
))
22209 gcc_assert (!MEM_P (operands
[0])
22210 || offsettable_memref_p (operands
[0]));
22212 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
22213 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
22215 /* When emitting push, take care for source operands on the stack. */
22216 if (push
&& MEM_P (operands
[1])
22217 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
22219 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
22221 /* Compensate for the stack decrement by 4. */
22222 if (!TARGET_64BIT
&& nparts
== 3
22223 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
22224 src_base
= plus_constant (Pmode
, src_base
, 4);
22226 /* src_base refers to the stack pointer and is
22227 automatically decreased by emitted push. */
22228 for (i
= 0; i
< nparts
; i
++)
22229 part
[1][i
] = change_address (part
[1][i
],
22230 GET_MODE (part
[1][i
]), src_base
);
22233 /* We need to do copy in the right order in case an address register
22234 of the source overlaps the destination. */
22235 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
22239 for (i
= 0; i
< nparts
; i
++)
22242 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
22243 if (collisionparts
[i
])
22247 /* Collision in the middle part can be handled by reordering. */
22248 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
22250 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
22251 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
22253 else if (collisions
== 1
22255 && (collisionparts
[1] || collisionparts
[2]))
22257 if (collisionparts
[1])
22259 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
22260 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
22264 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
22265 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
22269 /* If there are more collisions, we can't handle it by reordering.
22270 Do an lea to the last part and use only one colliding move. */
22271 else if (collisions
> 1)
22277 base
= part
[0][nparts
- 1];
22279 /* Handle the case when the last part isn't valid for lea.
22280 Happens in 64-bit mode storing the 12-byte XFmode. */
22281 if (GET_MODE (base
) != Pmode
)
22282 base
= gen_rtx_REG (Pmode
, REGNO (base
));
22284 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
22285 part
[1][0] = replace_equiv_address (part
[1][0], base
);
22286 for (i
= 1; i
< nparts
; i
++)
22288 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
22289 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
22300 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
22301 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
22302 stack_pointer_rtx
, GEN_INT (-4)));
22303 emit_move_insn (part
[0][2], part
[1][2]);
22305 else if (nparts
== 4)
22307 emit_move_insn (part
[0][3], part
[1][3]);
22308 emit_move_insn (part
[0][2], part
[1][2]);
22313 /* In 64bit mode we don't have 32bit push available. In case this is
22314 register, it is OK - we will just use larger counterpart. We also
22315 retype memory - these comes from attempt to avoid REX prefix on
22316 moving of second half of TFmode value. */
22317 if (GET_MODE (part
[1][1]) == SImode
)
22319 switch (GET_CODE (part
[1][1]))
22322 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
22326 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
22330 gcc_unreachable ();
22333 if (GET_MODE (part
[1][0]) == SImode
)
22334 part
[1][0] = part
[1][1];
22337 emit_move_insn (part
[0][1], part
[1][1]);
22338 emit_move_insn (part
[0][0], part
[1][0]);
22342 /* Choose correct order to not overwrite the source before it is copied. */
22343 if ((REG_P (part
[0][0])
22344 && REG_P (part
[1][1])
22345 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
22347 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
22349 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
22351 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
22353 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
22355 operands
[2 + i
] = part
[0][j
];
22356 operands
[6 + i
] = part
[1][j
];
22361 for (i
= 0; i
< nparts
; i
++)
22363 operands
[2 + i
] = part
[0][i
];
22364 operands
[6 + i
] = part
[1][i
];
22368 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22369 if (optimize_insn_for_size_p ())
22371 for (j
= 0; j
< nparts
- 1; j
++)
22372 if (CONST_INT_P (operands
[6 + j
])
22373 && operands
[6 + j
] != const0_rtx
22374 && REG_P (operands
[2 + j
]))
22375 for (i
= j
; i
< nparts
- 1; i
++)
22376 if (CONST_INT_P (operands
[7 + i
])
22377 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
22378 operands
[7 + i
] = operands
[2 + j
];
22381 for (i
= 0; i
< nparts
; i
++)
22382 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
22387 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22388 left shift by a constant, either using a single shift or
22389 a sequence of add instructions. */
22392 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
22394 rtx (*insn
)(rtx
, rtx
, rtx
);
22397 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
22398 && !optimize_insn_for_size_p ()))
22400 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
22401 while (count
-- > 0)
22402 emit_insn (insn (operand
, operand
, operand
));
22406 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
22407 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
22412 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22414 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
22415 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
22416 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22418 rtx low
[2], high
[2];
22421 if (CONST_INT_P (operands
[2]))
22423 split_double_mode (mode
, operands
, 2, low
, high
);
22424 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22426 if (count
>= half_width
)
22428 emit_move_insn (high
[0], low
[1]);
22429 emit_move_insn (low
[0], const0_rtx
);
22431 if (count
> half_width
)
22432 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
22436 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22438 if (!rtx_equal_p (operands
[0], operands
[1]))
22439 emit_move_insn (operands
[0], operands
[1]);
22441 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
22442 ix86_expand_ashl_const (low
[0], count
, mode
);
22447 split_double_mode (mode
, operands
, 1, low
, high
);
22449 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
22451 if (operands
[1] == const1_rtx
)
22453 /* Assuming we've chosen a QImode capable registers, then 1 << N
22454 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22455 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
22457 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
22459 ix86_expand_clear (low
[0]);
22460 ix86_expand_clear (high
[0]);
22461 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
22463 d
= gen_lowpart (QImode
, low
[0]);
22464 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22465 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
22466 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22468 d
= gen_lowpart (QImode
, high
[0]);
22469 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22470 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
22471 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22474 /* Otherwise, we can get the same results by manually performing
22475 a bit extract operation on bit 5/6, and then performing the two
22476 shifts. The two methods of getting 0/1 into low/high are exactly
22477 the same size. Avoiding the shift in the bit extract case helps
22478 pentium4 a bit; no one else seems to care much either way. */
22481 enum machine_mode half_mode
;
22482 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
22483 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
22484 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
22485 HOST_WIDE_INT bits
;
22488 if (mode
== DImode
)
22490 half_mode
= SImode
;
22491 gen_lshr3
= gen_lshrsi3
;
22492 gen_and3
= gen_andsi3
;
22493 gen_xor3
= gen_xorsi3
;
22498 half_mode
= DImode
;
22499 gen_lshr3
= gen_lshrdi3
;
22500 gen_and3
= gen_anddi3
;
22501 gen_xor3
= gen_xordi3
;
22505 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
22506 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
22508 x
= gen_lowpart (half_mode
, operands
[2]);
22509 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
22511 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
22512 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
22513 emit_move_insn (low
[0], high
[0]);
22514 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
22517 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22518 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
22522 if (operands
[1] == constm1_rtx
)
22524 /* For -1 << N, we can avoid the shld instruction, because we
22525 know that we're shifting 0...31/63 ones into a -1. */
22526 emit_move_insn (low
[0], constm1_rtx
);
22527 if (optimize_insn_for_size_p ())
22528 emit_move_insn (high
[0], low
[0]);
22530 emit_move_insn (high
[0], constm1_rtx
);
22534 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22536 if (!rtx_equal_p (operands
[0], operands
[1]))
22537 emit_move_insn (operands
[0], operands
[1]);
22539 split_double_mode (mode
, operands
, 1, low
, high
);
22540 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
22543 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22545 if (TARGET_CMOVE
&& scratch
)
22547 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22548 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22550 ix86_expand_clear (scratch
);
22551 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
22555 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22556 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22558 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
22563 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22565 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
22566 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
22567 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22568 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22570 rtx low
[2], high
[2];
22573 if (CONST_INT_P (operands
[2]))
22575 split_double_mode (mode
, operands
, 2, low
, high
);
22576 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22578 if (count
== GET_MODE_BITSIZE (mode
) - 1)
22580 emit_move_insn (high
[0], high
[1]);
22581 emit_insn (gen_ashr3 (high
[0], high
[0],
22582 GEN_INT (half_width
- 1)));
22583 emit_move_insn (low
[0], high
[0]);
22586 else if (count
>= half_width
)
22588 emit_move_insn (low
[0], high
[1]);
22589 emit_move_insn (high
[0], low
[0]);
22590 emit_insn (gen_ashr3 (high
[0], high
[0],
22591 GEN_INT (half_width
- 1)));
22593 if (count
> half_width
)
22594 emit_insn (gen_ashr3 (low
[0], low
[0],
22595 GEN_INT (count
- half_width
)));
22599 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22601 if (!rtx_equal_p (operands
[0], operands
[1]))
22602 emit_move_insn (operands
[0], operands
[1]);
22604 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22605 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
22610 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22612 if (!rtx_equal_p (operands
[0], operands
[1]))
22613 emit_move_insn (operands
[0], operands
[1]);
22615 split_double_mode (mode
, operands
, 1, low
, high
);
22617 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22618 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
22620 if (TARGET_CMOVE
&& scratch
)
22622 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22623 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22625 emit_move_insn (scratch
, high
[0]);
22626 emit_insn (gen_ashr3 (scratch
, scratch
,
22627 GEN_INT (half_width
- 1)));
22628 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22633 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
22634 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
22636 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
22642 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22644 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
22645 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
22646 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22647 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22649 rtx low
[2], high
[2];
22652 if (CONST_INT_P (operands
[2]))
22654 split_double_mode (mode
, operands
, 2, low
, high
);
22655 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22657 if (count
>= half_width
)
22659 emit_move_insn (low
[0], high
[1]);
22660 ix86_expand_clear (high
[0]);
22662 if (count
> half_width
)
22663 emit_insn (gen_lshr3 (low
[0], low
[0],
22664 GEN_INT (count
- half_width
)));
22668 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22670 if (!rtx_equal_p (operands
[0], operands
[1]))
22671 emit_move_insn (operands
[0], operands
[1]);
22673 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22674 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
22679 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22681 if (!rtx_equal_p (operands
[0], operands
[1]))
22682 emit_move_insn (operands
[0], operands
[1]);
22684 split_double_mode (mode
, operands
, 1, low
, high
);
22686 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22687 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
22689 if (TARGET_CMOVE
&& scratch
)
22691 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22692 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22694 ix86_expand_clear (scratch
);
22695 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22700 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22701 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22703 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
22708 /* Predict just emitted jump instruction to be taken with probability PROB. */
22710 predict_jump (int prob
)
22712 rtx insn
= get_last_insn ();
22713 gcc_assert (JUMP_P (insn
));
22714 add_int_reg_note (insn
, REG_BR_PROB
, prob
);
22717 /* Helper function for the string operations below. Dest VARIABLE whether
22718 it is aligned to VALUE bytes. If true, jump to the label. */
22719 static rtx_code_label
*
22720 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
22722 rtx_code_label
*label
= gen_label_rtx ();
22723 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
22724 if (GET_MODE (variable
) == DImode
)
22725 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
22727 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
22728 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
22731 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
22733 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22737 /* Adjust COUNTER by the VALUE. */
22739 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
22741 rtx (*gen_add
)(rtx
, rtx
, rtx
)
22742 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
22744 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
22747 /* Zero extend possibly SImode EXP to Pmode register. */
22749 ix86_zero_extend_to_Pmode (rtx exp
)
22751 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22754 /* Divide COUNTREG by SCALE. */
22756 scale_counter (rtx countreg
, int scale
)
22762 if (CONST_INT_P (countreg
))
22763 return GEN_INT (INTVAL (countreg
) / scale
);
22764 gcc_assert (REG_P (countreg
));
22766 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22767 GEN_INT (exact_log2 (scale
)),
22768 NULL
, 1, OPTAB_DIRECT
);
22772 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22773 DImode for constant loop counts. */
22775 static enum machine_mode
22776 counter_mode (rtx count_exp
)
22778 if (GET_MODE (count_exp
) != VOIDmode
)
22779 return GET_MODE (count_exp
);
22780 if (!CONST_INT_P (count_exp
))
22782 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22787 /* Copy the address to a Pmode register. This is used for x32 to
22788 truncate DImode TLS address to a SImode register. */
22791 ix86_copy_addr_to_reg (rtx addr
)
22793 if (GET_MODE (addr
) == Pmode
|| GET_MODE (addr
) == VOIDmode
)
22794 return copy_addr_to_reg (addr
);
22797 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
22798 return gen_rtx_SUBREG (SImode
, copy_to_mode_reg (DImode
, addr
), 0);
22802 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22803 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22804 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22805 memory by VALUE (supposed to be in MODE).
22807 The size is rounded down to whole number of chunk size moved at once.
22808 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22812 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22813 rtx destptr
, rtx srcptr
, rtx value
,
22814 rtx count
, enum machine_mode mode
, int unroll
,
22815 int expected_size
, bool issetmem
)
22817 rtx_code_label
*out_label
, *top_label
;
22819 enum machine_mode iter_mode
= counter_mode (count
);
22820 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22821 rtx piece_size
= GEN_INT (piece_size_n
);
22822 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22826 top_label
= gen_label_rtx ();
22827 out_label
= gen_label_rtx ();
22828 iter
= gen_reg_rtx (iter_mode
);
22830 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22831 NULL
, 1, OPTAB_DIRECT
);
22832 /* Those two should combine. */
22833 if (piece_size
== const1_rtx
)
22835 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22837 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22839 emit_move_insn (iter
, const0_rtx
);
22841 emit_label (top_label
);
22843 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22845 /* This assert could be relaxed - in this case we'll need to compute
22846 smallest power of two, containing in PIECE_SIZE_N and pass it to
22848 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22849 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22850 destmem
= adjust_address (destmem
, mode
, 0);
22854 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22855 srcmem
= adjust_address (srcmem
, mode
, 0);
22857 /* When unrolling for chips that reorder memory reads and writes,
22858 we can save registers by using single temporary.
22859 Also using 4 temporaries is overkill in 32bit mode. */
22860 if (!TARGET_64BIT
&& 0)
22862 for (i
= 0; i
< unroll
; i
++)
22867 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22869 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22871 emit_move_insn (destmem
, srcmem
);
22877 gcc_assert (unroll
<= 4);
22878 for (i
= 0; i
< unroll
; i
++)
22880 tmpreg
[i
] = gen_reg_rtx (mode
);
22884 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22886 emit_move_insn (tmpreg
[i
], srcmem
);
22888 for (i
= 0; i
< unroll
; i
++)
22893 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22895 emit_move_insn (destmem
, tmpreg
[i
]);
22900 for (i
= 0; i
< unroll
; i
++)
22904 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22905 emit_move_insn (destmem
, value
);
22908 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22909 true, OPTAB_LIB_WIDEN
);
22911 emit_move_insn (iter
, tmp
);
22913 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22915 if (expected_size
!= -1)
22917 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22918 if (expected_size
== 0)
22920 else if (expected_size
> REG_BR_PROB_BASE
)
22921 predict_jump (REG_BR_PROB_BASE
- 1);
22923 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22926 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22927 iter
= ix86_zero_extend_to_Pmode (iter
);
22928 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22929 true, OPTAB_LIB_WIDEN
);
22930 if (tmp
!= destptr
)
22931 emit_move_insn (destptr
, tmp
);
22934 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22935 true, OPTAB_LIB_WIDEN
);
22937 emit_move_insn (srcptr
, tmp
);
22939 emit_label (out_label
);
22942 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22943 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22944 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22945 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22946 ORIG_VALUE is the original value passed to memset to fill the memory with.
22947 Other arguments have same meaning as for previous function. */
22950 expand_set_or_movmem_via_rep (rtx destmem
, rtx srcmem
,
22951 rtx destptr
, rtx srcptr
, rtx value
, rtx orig_value
,
22953 enum machine_mode mode
, bool issetmem
)
22958 HOST_WIDE_INT rounded_count
;
22960 /* If possible, it is shorter to use rep movs.
22961 TODO: Maybe it is better to move this logic to decide_alg. */
22962 if (mode
== QImode
&& CONST_INT_P (count
) && !(INTVAL (count
) & 3)
22963 && (!issetmem
|| orig_value
== const0_rtx
))
22966 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22967 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22969 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
,
22970 GET_MODE_SIZE (mode
)));
22971 if (mode
!= QImode
)
22973 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22974 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22975 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22978 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22979 if ((!issetmem
|| orig_value
== const0_rtx
) && CONST_INT_P (count
))
22981 rounded_count
= (INTVAL (count
)
22982 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22983 destmem
= shallow_copy_rtx (destmem
);
22984 set_mem_size (destmem
, rounded_count
);
22986 else if (MEM_SIZE_KNOWN_P (destmem
))
22987 clear_mem_size (destmem
);
22991 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22992 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22996 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22997 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22998 if (mode
!= QImode
)
23000 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
23001 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
23002 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
23005 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
23006 if (CONST_INT_P (count
))
23008 rounded_count
= (INTVAL (count
)
23009 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
23010 srcmem
= shallow_copy_rtx (srcmem
);
23011 set_mem_size (srcmem
, rounded_count
);
23015 if (MEM_SIZE_KNOWN_P (srcmem
))
23016 clear_mem_size (srcmem
);
23018 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
23023 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23025 SRC is passed by pointer to be updated on return.
23026 Return value is updated DST. */
23028 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
23029 HOST_WIDE_INT size_to_move
)
23031 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
23032 enum insn_code code
;
23033 enum machine_mode move_mode
;
23036 /* Find the widest mode in which we could perform moves.
23037 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23038 it until move of such size is supported. */
23039 piece_size
= 1 << floor_log2 (size_to_move
);
23040 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
23041 code
= optab_handler (mov_optab
, move_mode
);
23042 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
23045 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
23046 code
= optab_handler (mov_optab
, move_mode
);
23049 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23050 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23051 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23053 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23054 move_mode
= mode_for_vector (word_mode
, nunits
);
23055 code
= optab_handler (mov_optab
, move_mode
);
23056 if (code
== CODE_FOR_nothing
)
23058 move_mode
= word_mode
;
23059 piece_size
= GET_MODE_SIZE (move_mode
);
23060 code
= optab_handler (mov_optab
, move_mode
);
23063 gcc_assert (code
!= CODE_FOR_nothing
);
23065 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
23066 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
23068 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23069 gcc_assert (size_to_move
% piece_size
== 0);
23070 adjust
= GEN_INT (piece_size
);
23071 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
23073 /* We move from memory to memory, so we'll need to do it via
23074 a temporary register. */
23075 tempreg
= gen_reg_rtx (move_mode
);
23076 emit_insn (GEN_FCN (code
) (tempreg
, src
));
23077 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
23079 emit_move_insn (destptr
,
23080 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
23081 emit_move_insn (srcptr
,
23082 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
23084 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
23086 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
23090 /* Update DST and SRC rtx. */
23095 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23097 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
23098 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
23101 if (CONST_INT_P (count
))
23103 HOST_WIDE_INT countval
= INTVAL (count
);
23104 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
23107 /* For now MAX_SIZE should be a power of 2. This assert could be
23108 relaxed, but it'll require a bit more complicated epilogue
23110 gcc_assert ((max_size
& (max_size
- 1)) == 0);
23111 for (i
= max_size
; i
>= 1; i
>>= 1)
23113 if (epilogue_size
& i
)
23114 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
23120 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
23121 count
, 1, OPTAB_DIRECT
);
23122 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
23123 count
, QImode
, 1, 4, false);
23127 /* When there are stringops, we can cheaply increase dest and src pointers.
23128 Otherwise we save code size by maintaining offset (zero is readily
23129 available from preceding rep operation) and using x86 addressing modes.
23131 if (TARGET_SINGLE_STRINGOP
)
23135 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
23136 src
= change_address (srcmem
, SImode
, srcptr
);
23137 dest
= change_address (destmem
, SImode
, destptr
);
23138 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
23139 emit_label (label
);
23140 LABEL_NUSES (label
) = 1;
23144 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
23145 src
= change_address (srcmem
, HImode
, srcptr
);
23146 dest
= change_address (destmem
, HImode
, destptr
);
23147 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
23148 emit_label (label
);
23149 LABEL_NUSES (label
) = 1;
23153 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
23154 src
= change_address (srcmem
, QImode
, srcptr
);
23155 dest
= change_address (destmem
, QImode
, destptr
);
23156 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
23157 emit_label (label
);
23158 LABEL_NUSES (label
) = 1;
23163 rtx offset
= force_reg (Pmode
, const0_rtx
);
23168 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
23169 src
= change_address (srcmem
, SImode
, srcptr
);
23170 dest
= change_address (destmem
, SImode
, destptr
);
23171 emit_move_insn (dest
, src
);
23172 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
23173 true, OPTAB_LIB_WIDEN
);
23175 emit_move_insn (offset
, tmp
);
23176 emit_label (label
);
23177 LABEL_NUSES (label
) = 1;
23181 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
23182 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
23183 src
= change_address (srcmem
, HImode
, tmp
);
23184 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
23185 dest
= change_address (destmem
, HImode
, tmp
);
23186 emit_move_insn (dest
, src
);
23187 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
23188 true, OPTAB_LIB_WIDEN
);
23190 emit_move_insn (offset
, tmp
);
23191 emit_label (label
);
23192 LABEL_NUSES (label
) = 1;
23196 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
23197 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
23198 src
= change_address (srcmem
, QImode
, tmp
);
23199 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
23200 dest
= change_address (destmem
, QImode
, tmp
);
23201 emit_move_insn (dest
, src
);
23202 emit_label (label
);
23203 LABEL_NUSES (label
) = 1;
23208 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23209 with value PROMOTED_VAL.
23210 SRC is passed by pointer to be updated on return.
23211 Return value is updated DST. */
23213 emit_memset (rtx destmem
, rtx destptr
, rtx promoted_val
,
23214 HOST_WIDE_INT size_to_move
)
23216 rtx dst
= destmem
, adjust
;
23217 enum insn_code code
;
23218 enum machine_mode move_mode
;
23221 /* Find the widest mode in which we could perform moves.
23222 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23223 it until move of such size is supported. */
23224 move_mode
= GET_MODE (promoted_val
);
23225 if (move_mode
== VOIDmode
)
23226 move_mode
= QImode
;
23227 if (size_to_move
< GET_MODE_SIZE (move_mode
))
23229 move_mode
= mode_for_size (size_to_move
* BITS_PER_UNIT
, MODE_INT
, 0);
23230 promoted_val
= gen_lowpart (move_mode
, promoted_val
);
23232 piece_size
= GET_MODE_SIZE (move_mode
);
23233 code
= optab_handler (mov_optab
, move_mode
);
23234 gcc_assert (code
!= CODE_FOR_nothing
&& promoted_val
!= NULL_RTX
);
23236 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
23238 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23239 gcc_assert (size_to_move
% piece_size
== 0);
23240 adjust
= GEN_INT (piece_size
);
23241 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
23243 if (piece_size
<= GET_MODE_SIZE (word_mode
))
23245 emit_insn (gen_strset (destptr
, dst
, promoted_val
));
23246 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
23251 emit_insn (GEN_FCN (code
) (dst
, promoted_val
));
23253 emit_move_insn (destptr
,
23254 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
23256 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
23260 /* Update DST rtx. */
23263 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23265 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
23266 rtx count
, int max_size
)
23269 expand_simple_binop (counter_mode (count
), AND
, count
,
23270 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
23271 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
23272 gen_lowpart (QImode
, value
), count
, QImode
,
23273 1, max_size
/ 2, true);
23276 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23278 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx vec_value
,
23279 rtx count
, int max_size
)
23283 if (CONST_INT_P (count
))
23285 HOST_WIDE_INT countval
= INTVAL (count
);
23286 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
23289 /* For now MAX_SIZE should be a power of 2. This assert could be
23290 relaxed, but it'll require a bit more complicated epilogue
23292 gcc_assert ((max_size
& (max_size
- 1)) == 0);
23293 for (i
= max_size
; i
>= 1; i
>>= 1)
23295 if (epilogue_size
& i
)
23297 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
23298 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
23300 destmem
= emit_memset (destmem
, destptr
, value
, i
);
23307 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
23312 rtx_code_label
*label
= ix86_expand_aligntest (count
, 16, true);
23315 dest
= change_address (destmem
, DImode
, destptr
);
23316 emit_insn (gen_strset (destptr
, dest
, value
));
23317 dest
= adjust_automodify_address_nv (dest
, DImode
, destptr
, 8);
23318 emit_insn (gen_strset (destptr
, dest
, value
));
23322 dest
= change_address (destmem
, SImode
, destptr
);
23323 emit_insn (gen_strset (destptr
, dest
, value
));
23324 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
23325 emit_insn (gen_strset (destptr
, dest
, value
));
23326 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 8);
23327 emit_insn (gen_strset (destptr
, dest
, value
));
23328 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 12);
23329 emit_insn (gen_strset (destptr
, dest
, value
));
23331 emit_label (label
);
23332 LABEL_NUSES (label
) = 1;
23336 rtx_code_label
*label
= ix86_expand_aligntest (count
, 8, true);
23339 dest
= change_address (destmem
, DImode
, destptr
);
23340 emit_insn (gen_strset (destptr
, dest
, value
));
23344 dest
= change_address (destmem
, SImode
, destptr
);
23345 emit_insn (gen_strset (destptr
, dest
, value
));
23346 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
23347 emit_insn (gen_strset (destptr
, dest
, value
));
23349 emit_label (label
);
23350 LABEL_NUSES (label
) = 1;
23354 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
23355 dest
= change_address (destmem
, SImode
, destptr
);
23356 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
23357 emit_label (label
);
23358 LABEL_NUSES (label
) = 1;
23362 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
23363 dest
= change_address (destmem
, HImode
, destptr
);
23364 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
23365 emit_label (label
);
23366 LABEL_NUSES (label
) = 1;
23370 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
23371 dest
= change_address (destmem
, QImode
, destptr
);
23372 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
23373 emit_label (label
);
23374 LABEL_NUSES (label
) = 1;
23378 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23379 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23380 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23382 Return value is updated DESTMEM. */
23384 expand_set_or_movmem_prologue (rtx destmem
, rtx srcmem
,
23385 rtx destptr
, rtx srcptr
, rtx value
,
23386 rtx vec_value
, rtx count
, int align
,
23387 int desired_alignment
, bool issetmem
)
23390 for (i
= 1; i
< desired_alignment
; i
<<= 1)
23394 rtx_code_label
*label
= ix86_expand_aligntest (destptr
, i
, false);
23397 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
23398 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
23400 destmem
= emit_memset (destmem
, destptr
, value
, i
);
23403 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
23404 ix86_adjust_counter (count
, i
);
23405 emit_label (label
);
23406 LABEL_NUSES (label
) = 1;
23407 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
23413 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23414 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23415 and jump to DONE_LABEL. */
23417 expand_small_movmem_or_setmem (rtx destmem
, rtx srcmem
,
23418 rtx destptr
, rtx srcptr
,
23419 rtx value
, rtx vec_value
,
23420 rtx count
, int size
,
23421 rtx done_label
, bool issetmem
)
23423 rtx_code_label
*label
= ix86_expand_aligntest (count
, size
, false);
23424 enum machine_mode mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 1);
23428 /* If we do not have vector value to copy, we must reduce size. */
23433 if (GET_MODE (value
) == VOIDmode
&& size
> 8)
23435 else if (GET_MODE_SIZE (mode
) > GET_MODE_SIZE (GET_MODE (value
)))
23436 mode
= GET_MODE (value
);
23439 mode
= GET_MODE (vec_value
), value
= vec_value
;
23443 /* Choose appropriate vector mode. */
23445 mode
= TARGET_AVX
? V32QImode
: TARGET_SSE
? V16QImode
: DImode
;
23446 else if (size
>= 16)
23447 mode
= TARGET_SSE
? V16QImode
: DImode
;
23448 srcmem
= change_address (srcmem
, mode
, srcptr
);
23450 destmem
= change_address (destmem
, mode
, destptr
);
23451 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23452 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23453 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23456 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23459 emit_move_insn (destmem
, srcmem
);
23460 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23462 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23465 destmem
= offset_address (destmem
, count
, 1);
23466 destmem
= offset_address (destmem
, GEN_INT (-2 * size
),
23467 GET_MODE_SIZE (mode
));
23470 srcmem
= offset_address (srcmem
, count
, 1);
23471 srcmem
= offset_address (srcmem
, GEN_INT (-2 * size
),
23472 GET_MODE_SIZE (mode
));
23474 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23477 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23480 emit_move_insn (destmem
, srcmem
);
23481 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23483 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23485 emit_jump_insn (gen_jump (done_label
));
23488 emit_label (label
);
23489 LABEL_NUSES (label
) = 1;
23492 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23493 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23494 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23495 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23496 DONE_LABEL is a label after the whole copying sequence. The label is created
23497 on demand if *DONE_LABEL is NULL.
23498 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23499 bounds after the initial copies.
23501 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23502 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23503 we will dispatch to a library call for large blocks.
23505 In pseudocode we do:
23509 Assume that SIZE is 4. Bigger sizes are handled analogously
23512 copy 4 bytes from SRCPTR to DESTPTR
23513 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23518 copy 1 byte from SRCPTR to DESTPTR
23521 copy 2 bytes from SRCPTR to DESTPTR
23522 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23527 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23528 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23530 OLD_DESPTR = DESTPTR;
23531 Align DESTPTR up to DESIRED_ALIGN
23532 SRCPTR += DESTPTR - OLD_DESTPTR
23533 COUNT -= DEST_PTR - OLD_DESTPTR
23535 Round COUNT down to multiple of SIZE
23536 << optional caller supplied zero size guard is here >>
23537 << optional caller suppplied dynamic check is here >>
23538 << caller supplied main copy loop is here >>
23543 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem
, rtx srcmem
,
23544 rtx
*destptr
, rtx
*srcptr
,
23545 enum machine_mode mode
,
23546 rtx value
, rtx vec_value
,
23548 rtx_code_label
**done_label
,
23552 unsigned HOST_WIDE_INT
*min_size
,
23553 bool dynamic_check
,
23556 rtx_code_label
*loop_label
= NULL
, *label
;
23559 int prolog_size
= 0;
23562 /* Chose proper value to copy. */
23563 if (issetmem
&& VECTOR_MODE_P (mode
))
23564 mode_value
= vec_value
;
23566 mode_value
= value
;
23567 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23569 /* See if block is big or small, handle small blocks. */
23570 if (!CONST_INT_P (*count
) && *min_size
< (unsigned HOST_WIDE_INT
)size
)
23573 loop_label
= gen_label_rtx ();
23576 *done_label
= gen_label_rtx ();
23578 emit_cmp_and_jump_insns (*count
, GEN_INT (size2
), GE
, 0, GET_MODE (*count
),
23582 /* Handle sizes > 3. */
23583 for (;size2
> 2; size2
>>= 1)
23584 expand_small_movmem_or_setmem (destmem
, srcmem
,
23588 size2
, *done_label
, issetmem
);
23589 /* Nothing to copy? Jump to DONE_LABEL if so */
23590 emit_cmp_and_jump_insns (*count
, const0_rtx
, EQ
, 0, GET_MODE (*count
),
23593 /* Do a byte copy. */
23594 destmem
= change_address (destmem
, QImode
, *destptr
);
23596 emit_move_insn (destmem
, gen_lowpart (QImode
, value
));
23599 srcmem
= change_address (srcmem
, QImode
, *srcptr
);
23600 emit_move_insn (destmem
, srcmem
);
23603 /* Handle sizes 2 and 3. */
23604 label
= ix86_expand_aligntest (*count
, 2, false);
23605 destmem
= change_address (destmem
, HImode
, *destptr
);
23606 destmem
= offset_address (destmem
, *count
, 1);
23607 destmem
= offset_address (destmem
, GEN_INT (-2), 2);
23609 emit_move_insn (destmem
, gen_lowpart (HImode
, value
));
23612 srcmem
= change_address (srcmem
, HImode
, *srcptr
);
23613 srcmem
= offset_address (srcmem
, *count
, 1);
23614 srcmem
= offset_address (srcmem
, GEN_INT (-2), 2);
23615 emit_move_insn (destmem
, srcmem
);
23618 emit_label (label
);
23619 LABEL_NUSES (label
) = 1;
23620 emit_jump_insn (gen_jump (*done_label
));
23624 gcc_assert (*min_size
>= (unsigned HOST_WIDE_INT
)size
23625 || UINTVAL (*count
) >= (unsigned HOST_WIDE_INT
)size
);
23627 /* Start memcpy for COUNT >= SIZE. */
23630 emit_label (loop_label
);
23631 LABEL_NUSES (loop_label
) = 1;
23634 /* Copy first desired_align bytes. */
23636 srcmem
= change_address (srcmem
, mode
, *srcptr
);
23637 destmem
= change_address (destmem
, mode
, *destptr
);
23638 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23639 for (n
= 0; prolog_size
< desired_align
- align
; n
++)
23642 emit_move_insn (destmem
, mode_value
);
23645 emit_move_insn (destmem
, srcmem
);
23646 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23648 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23649 prolog_size
+= GET_MODE_SIZE (mode
);
23653 /* Copy last SIZE bytes. */
23654 destmem
= offset_address (destmem
, *count
, 1);
23655 destmem
= offset_address (destmem
,
23656 GEN_INT (-size
- prolog_size
),
23659 emit_move_insn (destmem
, mode_value
);
23662 srcmem
= offset_address (srcmem
, *count
, 1);
23663 srcmem
= offset_address (srcmem
,
23664 GEN_INT (-size
- prolog_size
),
23666 emit_move_insn (destmem
, srcmem
);
23668 for (n
= 1; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23670 destmem
= offset_address (destmem
, modesize
, 1);
23672 emit_move_insn (destmem
, mode_value
);
23675 srcmem
= offset_address (srcmem
, modesize
, 1);
23676 emit_move_insn (destmem
, srcmem
);
23680 /* Align destination. */
23681 if (desired_align
> 1 && desired_align
> align
)
23683 rtx saveddest
= *destptr
;
23685 gcc_assert (desired_align
<= size
);
23686 /* Align destptr up, place it to new register. */
23687 *destptr
= expand_simple_binop (GET_MODE (*destptr
), PLUS
, *destptr
,
23688 GEN_INT (prolog_size
),
23689 NULL_RTX
, 1, OPTAB_DIRECT
);
23690 *destptr
= expand_simple_binop (GET_MODE (*destptr
), AND
, *destptr
,
23691 GEN_INT (-desired_align
),
23692 *destptr
, 1, OPTAB_DIRECT
);
23693 /* See how many bytes we skipped. */
23694 saveddest
= expand_simple_binop (GET_MODE (*destptr
), MINUS
, saveddest
,
23696 saveddest
, 1, OPTAB_DIRECT
);
23697 /* Adjust srcptr and count. */
23699 *srcptr
= expand_simple_binop (GET_MODE (*srcptr
), MINUS
, *srcptr
, saveddest
,
23700 *srcptr
, 1, OPTAB_DIRECT
);
23701 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23702 saveddest
, *count
, 1, OPTAB_DIRECT
);
23703 /* We copied at most size + prolog_size. */
23704 if (*min_size
> (unsigned HOST_WIDE_INT
)(size
+ prolog_size
))
23705 *min_size
= (*min_size
- size
) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23709 /* Our loops always round down the bock size, but for dispatch to library
23710 we need precise value. */
23712 *count
= expand_simple_binop (GET_MODE (*count
), AND
, *count
,
23713 GEN_INT (-size
), *count
, 1, OPTAB_DIRECT
);
23717 gcc_assert (prolog_size
== 0);
23718 /* Decrease count, so we won't end up copying last word twice. */
23719 if (!CONST_INT_P (*count
))
23720 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23721 constm1_rtx
, *count
, 1, OPTAB_DIRECT
);
23723 *count
= GEN_INT ((UINTVAL (*count
) - 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1));
23725 *min_size
= (*min_size
- 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23730 /* This function is like the previous one, except here we know how many bytes
23731 need to be copied. That allows us to update alignment not only of DST, which
23732 is returned, but also of SRC, which is passed as a pointer for that
23735 expand_set_or_movmem_constant_prologue (rtx dst
, rtx
*srcp
, rtx destreg
,
23736 rtx srcreg
, rtx value
, rtx vec_value
,
23737 int desired_align
, int align_bytes
,
23741 rtx orig_dst
= dst
;
23742 rtx orig_src
= NULL
;
23743 int piece_size
= 1;
23744 int copied_bytes
= 0;
23748 gcc_assert (srcp
!= NULL
);
23753 for (piece_size
= 1;
23754 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
23757 if (align_bytes
& piece_size
)
23761 if (vec_value
&& piece_size
> GET_MODE_SIZE (GET_MODE (value
)))
23762 dst
= emit_memset (dst
, destreg
, vec_value
, piece_size
);
23764 dst
= emit_memset (dst
, destreg
, value
, piece_size
);
23767 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
23768 copied_bytes
+= piece_size
;
23771 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
23772 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23773 if (MEM_SIZE_KNOWN_P (orig_dst
))
23774 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
23778 int src_align_bytes
= get_mem_align_offset (src
, desired_align
23780 if (src_align_bytes
>= 0)
23781 src_align_bytes
= desired_align
- src_align_bytes
;
23782 if (src_align_bytes
>= 0)
23784 unsigned int src_align
;
23785 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
23787 if ((src_align_bytes
& (src_align
- 1))
23788 == (align_bytes
& (src_align
- 1)))
23791 if (src_align
> (unsigned int) desired_align
)
23792 src_align
= desired_align
;
23793 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
23794 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
23796 if (MEM_SIZE_KNOWN_P (orig_src
))
23797 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
23804 /* Return true if ALG can be used in current context.
23805 Assume we expand memset if MEMSET is true. */
23807 alg_usable_p (enum stringop_alg alg
, bool memset
)
23809 if (alg
== no_stringop
)
23811 if (alg
== vector_loop
)
23812 return TARGET_SSE
|| TARGET_AVX
;
23813 /* Algorithms using the rep prefix want at least edi and ecx;
23814 additionally, memset wants eax and memcpy wants esi. Don't
23815 consider such algorithms if the user has appropriated those
23816 registers for their own purposes. */
23817 if (alg
== rep_prefix_1_byte
23818 || alg
== rep_prefix_4_byte
23819 || alg
== rep_prefix_8_byte
)
23820 return !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
23821 || (memset
? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
23825 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23826 static enum stringop_alg
23827 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
,
23828 unsigned HOST_WIDE_INT min_size
, unsigned HOST_WIDE_INT max_size
,
23829 bool memset
, bool zero_memset
, int *dynamic_check
, bool *noalign
)
23831 const struct stringop_algs
* algs
;
23832 bool optimize_for_speed
;
23834 const struct processor_costs
*cost
;
23836 bool any_alg_usable_p
= false;
23839 *dynamic_check
= -1;
23841 /* Even if the string operation call is cold, we still might spend a lot
23842 of time processing large blocks. */
23843 if (optimize_function_for_size_p (cfun
)
23844 || (optimize_insn_for_size_p ()
23846 || (expected_size
!= -1 && expected_size
< 256))))
23847 optimize_for_speed
= false;
23849 optimize_for_speed
= true;
23851 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
23853 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
23855 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
23857 /* See maximal size for user defined algorithm. */
23858 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23860 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23861 bool usable
= alg_usable_p (candidate
, memset
);
23862 any_alg_usable_p
|= usable
;
23864 if (candidate
!= libcall
&& candidate
&& usable
)
23865 max
= algs
->size
[i
].max
;
23868 /* If expected size is not known but max size is small enough
23869 so inline version is a win, set expected size into
23871 if (((max
> 1 && (unsigned HOST_WIDE_INT
) max
>= max_size
) || max
== -1)
23872 && expected_size
== -1)
23873 expected_size
= min_size
/ 2 + max_size
/ 2;
23875 /* If user specified the algorithm, honnor it if possible. */
23876 if (ix86_stringop_alg
!= no_stringop
23877 && alg_usable_p (ix86_stringop_alg
, memset
))
23878 return ix86_stringop_alg
;
23879 /* rep; movq or rep; movl is the smallest variant. */
23880 else if (!optimize_for_speed
)
23883 if (!count
|| (count
& 3) || (memset
&& !zero_memset
))
23884 return alg_usable_p (rep_prefix_1_byte
, memset
)
23885 ? rep_prefix_1_byte
: loop_1_byte
;
23887 return alg_usable_p (rep_prefix_4_byte
, memset
)
23888 ? rep_prefix_4_byte
: loop
;
23890 /* Very tiny blocks are best handled via the loop, REP is expensive to
23892 else if (expected_size
!= -1 && expected_size
< 4)
23893 return loop_1_byte
;
23894 else if (expected_size
!= -1)
23896 enum stringop_alg alg
= libcall
;
23897 bool alg_noalign
= false;
23898 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23900 /* We get here if the algorithms that were not libcall-based
23901 were rep-prefix based and we are unable to use rep prefixes
23902 based on global register usage. Break out of the loop and
23903 use the heuristic below. */
23904 if (algs
->size
[i
].max
== 0)
23906 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
23908 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23910 if (candidate
!= libcall
&& alg_usable_p (candidate
, memset
))
23913 alg_noalign
= algs
->size
[i
].noalign
;
23915 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23916 last non-libcall inline algorithm. */
23917 if (TARGET_INLINE_ALL_STRINGOPS
)
23919 /* When the current size is best to be copied by a libcall,
23920 but we are still forced to inline, run the heuristic below
23921 that will pick code for medium sized blocks. */
23922 if (alg
!= libcall
)
23924 *noalign
= alg_noalign
;
23929 else if (alg_usable_p (candidate
, memset
))
23931 *noalign
= algs
->size
[i
].noalign
;
23937 /* When asked to inline the call anyway, try to pick meaningful choice.
23938 We look for maximal size of block that is faster to copy by hand and
23939 take blocks of at most of that size guessing that average size will
23940 be roughly half of the block.
23942 If this turns out to be bad, we might simply specify the preferred
23943 choice in ix86_costs. */
23944 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23945 && (algs
->unknown_size
== libcall
23946 || !alg_usable_p (algs
->unknown_size
, memset
)))
23948 enum stringop_alg alg
;
23950 /* If there aren't any usable algorithms, then recursing on
23951 smaller sizes isn't going to find anything. Just return the
23952 simple byte-at-a-time copy loop. */
23953 if (!any_alg_usable_p
)
23955 /* Pick something reasonable. */
23956 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23957 *dynamic_check
= 128;
23958 return loop_1_byte
;
23962 alg
= decide_alg (count
, max
/ 2, min_size
, max_size
, memset
,
23963 zero_memset
, dynamic_check
, noalign
);
23964 gcc_assert (*dynamic_check
== -1);
23965 gcc_assert (alg
!= libcall
);
23966 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23967 *dynamic_check
= max
;
23970 return (alg_usable_p (algs
->unknown_size
, memset
)
23971 ? algs
->unknown_size
: libcall
);
23974 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23975 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23977 decide_alignment (int align
,
23978 enum stringop_alg alg
,
23980 enum machine_mode move_mode
)
23982 int desired_align
= 0;
23984 gcc_assert (alg
!= no_stringop
);
23986 if (alg
== libcall
)
23988 if (move_mode
== VOIDmode
)
23991 desired_align
= GET_MODE_SIZE (move_mode
);
23992 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23993 copying whole cacheline at once. */
23994 if (TARGET_PENTIUMPRO
23995 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
24000 if (desired_align
< align
)
24001 desired_align
= align
;
24002 if (expected_size
!= -1 && expected_size
< 4)
24003 desired_align
= align
;
24005 return desired_align
;
24009 /* Helper function for memcpy. For QImode value 0xXY produce
24010 0xXYXYXYXY of wide specified by MODE. This is essentially
24011 a * 0x10101010, but we can do slightly better than
24012 synth_mult by unwinding the sequence by hand on CPUs with
24015 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
24017 enum machine_mode valmode
= GET_MODE (val
);
24019 int nops
= mode
== DImode
? 3 : 2;
24021 gcc_assert (mode
== SImode
|| mode
== DImode
|| val
== const0_rtx
);
24022 if (val
== const0_rtx
)
24023 return copy_to_mode_reg (mode
, CONST0_RTX (mode
));
24024 if (CONST_INT_P (val
))
24026 HOST_WIDE_INT v
= INTVAL (val
) & 255;
24030 if (mode
== DImode
)
24031 v
|= (v
<< 16) << 16;
24032 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
24035 if (valmode
== VOIDmode
)
24037 if (valmode
!= QImode
)
24038 val
= gen_lowpart (QImode
, val
);
24039 if (mode
== QImode
)
24041 if (!TARGET_PARTIAL_REG_STALL
)
24043 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
24044 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
24045 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
24046 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
24048 rtx reg
= convert_modes (mode
, QImode
, val
, true);
24049 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
24050 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
24055 rtx reg
= convert_modes (mode
, QImode
, val
, true);
24057 if (!TARGET_PARTIAL_REG_STALL
)
24058 if (mode
== SImode
)
24059 emit_insn (gen_movsi_insv_1 (reg
, reg
));
24061 emit_insn (gen_movdi_insv_1 (reg
, reg
));
24064 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
24065 NULL
, 1, OPTAB_DIRECT
);
24067 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
24069 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
24070 NULL
, 1, OPTAB_DIRECT
);
24071 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
24072 if (mode
== SImode
)
24074 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
24075 NULL
, 1, OPTAB_DIRECT
);
24076 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
24081 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24082 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24083 alignment from ALIGN to DESIRED_ALIGN. */
24085 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
,
24091 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
24092 promoted_val
= promote_duplicated_reg (DImode
, val
);
24093 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
24094 promoted_val
= promote_duplicated_reg (SImode
, val
);
24095 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
24096 promoted_val
= promote_duplicated_reg (HImode
, val
);
24098 promoted_val
= val
;
24100 return promoted_val
;
24103 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24104 operations when profitable. The code depends upon architecture, block size
24105 and alignment, but always has one of the following overall structures:
24107 Aligned move sequence:
24109 1) Prologue guard: Conditional that jumps up to epilogues for small
24110 blocks that can be handled by epilogue alone. This is faster
24111 but also needed for correctness, since prologue assume the block
24112 is larger than the desired alignment.
24114 Optional dynamic check for size and libcall for large
24115 blocks is emitted here too, with -minline-stringops-dynamically.
24117 2) Prologue: copy first few bytes in order to get destination
24118 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24119 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24120 copied. We emit either a jump tree on power of two sized
24121 blocks, or a byte loop.
24123 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24124 with specified algorithm.
24126 4) Epilogue: code copying tail of the block that is too small to be
24127 handled by main body (or up to size guarded by prologue guard).
24129 Misaligned move sequence
24131 1) missaligned move prologue/epilogue containing:
24132 a) Prologue handling small memory blocks and jumping to done_label
24133 (skipped if blocks are known to be large enough)
24134 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24135 needed by single possibly misaligned move
24136 (skipped if alignment is not needed)
24137 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24139 2) Zero size guard dispatching to done_label, if needed
24141 3) dispatch to library call, if needed,
24143 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24144 with specified algorithm. */
24146 ix86_expand_set_or_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx val_exp
,
24147 rtx align_exp
, rtx expected_align_exp
,
24148 rtx expected_size_exp
, rtx min_size_exp
,
24149 rtx max_size_exp
, rtx probable_max_size_exp
,
24154 rtx_code_label
*label
= NULL
;
24156 rtx_code_label
*jump_around_label
= NULL
;
24157 HOST_WIDE_INT align
= 1;
24158 unsigned HOST_WIDE_INT count
= 0;
24159 HOST_WIDE_INT expected_size
= -1;
24160 int size_needed
= 0, epilogue_size_needed
;
24161 int desired_align
= 0, align_bytes
= 0;
24162 enum stringop_alg alg
;
24163 rtx promoted_val
= NULL
;
24164 rtx vec_promoted_val
= NULL
;
24165 bool force_loopy_epilogue
= false;
24167 bool need_zero_guard
= false;
24169 enum machine_mode move_mode
= VOIDmode
;
24170 int unroll_factor
= 1;
24171 /* TODO: Once value ranges are available, fill in proper data. */
24172 unsigned HOST_WIDE_INT min_size
= 0;
24173 unsigned HOST_WIDE_INT max_size
= -1;
24174 unsigned HOST_WIDE_INT probable_max_size
= -1;
24175 bool misaligned_prologue_used
= false;
24177 if (CONST_INT_P (align_exp
))
24178 align
= INTVAL (align_exp
);
24179 /* i386 can do misaligned access on reasonably increased cost. */
24180 if (CONST_INT_P (expected_align_exp
)
24181 && INTVAL (expected_align_exp
) > align
)
24182 align
= INTVAL (expected_align_exp
);
24183 /* ALIGN is the minimum of destination and source alignment, but we care here
24184 just about destination alignment. */
24186 && MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
24187 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
24189 if (CONST_INT_P (count_exp
))
24191 min_size
= max_size
= probable_max_size
= count
= expected_size
24192 = INTVAL (count_exp
);
24193 /* When COUNT is 0, there is nothing to do. */
24200 min_size
= INTVAL (min_size_exp
);
24202 max_size
= INTVAL (max_size_exp
);
24203 if (probable_max_size_exp
)
24204 probable_max_size
= INTVAL (probable_max_size_exp
);
24205 if (CONST_INT_P (expected_size_exp
))
24206 expected_size
= INTVAL (expected_size_exp
);
24209 /* Make sure we don't need to care about overflow later on. */
24210 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
24213 /* Step 0: Decide on preferred algorithm, desired alignment and
24214 size of chunks to be copied by main loop. */
24215 alg
= decide_alg (count
, expected_size
, min_size
, probable_max_size
,
24217 issetmem
&& val_exp
== const0_rtx
,
24218 &dynamic_check
, &noalign
);
24219 if (alg
== libcall
)
24221 gcc_assert (alg
!= no_stringop
);
24223 /* For now vector-version of memset is generated only for memory zeroing, as
24224 creating of promoted vector value is very cheap in this case. */
24225 if (issetmem
&& alg
== vector_loop
&& val_exp
!= const0_rtx
)
24226 alg
= unrolled_loop
;
24229 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
24230 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
24232 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
24235 move_mode
= word_mode
;
24241 gcc_unreachable ();
24243 need_zero_guard
= true;
24244 move_mode
= QImode
;
24247 need_zero_guard
= true;
24249 case unrolled_loop
:
24250 need_zero_guard
= true;
24251 unroll_factor
= (TARGET_64BIT
? 4 : 2);
24254 need_zero_guard
= true;
24256 /* Find the widest supported mode. */
24257 move_mode
= word_mode
;
24258 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
24259 != CODE_FOR_nothing
)
24260 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
24262 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24263 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24264 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
24266 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
24267 move_mode
= mode_for_vector (word_mode
, nunits
);
24268 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
24269 move_mode
= word_mode
;
24271 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
24273 case rep_prefix_8_byte
:
24274 move_mode
= DImode
;
24276 case rep_prefix_4_byte
:
24277 move_mode
= SImode
;
24279 case rep_prefix_1_byte
:
24280 move_mode
= QImode
;
24283 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
24284 epilogue_size_needed
= size_needed
;
24286 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
24287 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
24288 align
= desired_align
;
24290 /* Step 1: Prologue guard. */
24292 /* Alignment code needs count to be in register. */
24293 if (CONST_INT_P (count_exp
) && desired_align
> align
)
24295 if (INTVAL (count_exp
) > desired_align
24296 && INTVAL (count_exp
) > size_needed
)
24299 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
24300 if (align_bytes
<= 0)
24303 align_bytes
= desired_align
- align_bytes
;
24305 if (align_bytes
== 0)
24306 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
24308 gcc_assert (desired_align
>= 1 && align
>= 1);
24310 /* Misaligned move sequences handle both prologue and epilogue at once.
24311 Default code generation results in a smaller code for large alignments
24312 and also avoids redundant job when sizes are known precisely. */
24313 misaligned_prologue_used
24314 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24315 && MAX (desired_align
, epilogue_size_needed
) <= 32
24316 && desired_align
<= epilogue_size_needed
24317 && ((desired_align
> align
&& !align_bytes
)
24318 || (!count
&& epilogue_size_needed
> 1)));
24320 /* Do the cheap promotion to allow better CSE across the
24321 main loop and epilogue (ie one load of the big constant in the
24323 For now the misaligned move sequences do not have fast path
24324 without broadcasting. */
24325 if (issetmem
&& ((CONST_INT_P (val_exp
) || misaligned_prologue_used
)))
24327 if (alg
== vector_loop
)
24329 gcc_assert (val_exp
== const0_rtx
);
24330 vec_promoted_val
= promote_duplicated_reg (move_mode
, val_exp
);
24331 promoted_val
= promote_duplicated_reg_to_size (val_exp
,
24332 GET_MODE_SIZE (word_mode
),
24333 desired_align
, align
);
24337 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
24338 desired_align
, align
);
24341 /* Misaligned move sequences handles both prologues and epilogues at once.
24342 Default code generation results in smaller code for large alignments and
24343 also avoids redundant job when sizes are known precisely. */
24344 if (misaligned_prologue_used
)
24346 /* Misaligned move prologue handled small blocks by itself. */
24347 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24348 (dst
, src
, &destreg
, &srcreg
,
24349 move_mode
, promoted_val
, vec_promoted_val
,
24351 &jump_around_label
,
24352 desired_align
< align
24353 ? MAX (desired_align
, epilogue_size_needed
) : epilogue_size_needed
,
24354 desired_align
, align
, &min_size
, dynamic_check
, issetmem
);
24356 src
= change_address (src
, BLKmode
, srcreg
);
24357 dst
= change_address (dst
, BLKmode
, destreg
);
24358 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
24359 epilogue_size_needed
= 0;
24360 if (need_zero_guard
&& !min_size
)
24362 /* It is possible that we copied enough so the main loop will not
24364 gcc_assert (size_needed
> 1);
24365 if (jump_around_label
== NULL_RTX
)
24366 jump_around_label
= gen_label_rtx ();
24367 emit_cmp_and_jump_insns (count_exp
,
24368 GEN_INT (size_needed
),
24369 LTU
, 0, counter_mode (count_exp
), 1, jump_around_label
);
24370 if (expected_size
== -1
24371 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
24372 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24374 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24377 /* Ensure that alignment prologue won't copy past end of block. */
24378 else if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
24380 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
24381 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24382 Make sure it is power of 2. */
24383 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
24385 /* To improve performance of small blocks, we jump around the VAL
24386 promoting mode. This mean that if the promoted VAL is not constant,
24387 we might not use it in the epilogue and have to use byte
24389 if (issetmem
&& epilogue_size_needed
> 2 && !promoted_val
)
24390 force_loopy_epilogue
= true;
24391 if ((count
&& count
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
24392 || max_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
24394 /* If main algorithm works on QImode, no epilogue is needed.
24395 For small sizes just don't align anything. */
24396 if (size_needed
== 1)
24397 desired_align
= align
;
24402 && min_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
24404 label
= gen_label_rtx ();
24405 emit_cmp_and_jump_insns (count_exp
,
24406 GEN_INT (epilogue_size_needed
),
24407 LTU
, 0, counter_mode (count_exp
), 1, label
);
24408 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
24409 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24411 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24415 /* Emit code to decide on runtime whether library call or inline should be
24417 if (dynamic_check
!= -1)
24419 if (!issetmem
&& CONST_INT_P (count_exp
))
24421 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
24423 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
24424 count_exp
= const0_rtx
;
24430 rtx_code_label
*hot_label
= gen_label_rtx ();
24431 if (jump_around_label
== NULL_RTX
)
24432 jump_around_label
= gen_label_rtx ();
24433 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
24434 LEU
, 0, counter_mode (count_exp
),
24436 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
24438 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
24440 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
24441 emit_jump (jump_around_label
);
24442 emit_label (hot_label
);
24446 /* Step 2: Alignment prologue. */
24447 /* Do the expensive promotion once we branched off the small blocks. */
24448 if (issetmem
&& !promoted_val
)
24449 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
24450 desired_align
, align
);
24452 if (desired_align
> align
&& !misaligned_prologue_used
)
24454 if (align_bytes
== 0)
24456 /* Except for the first move in prologue, we no longer know
24457 constant offset in aliasing info. It don't seems to worth
24458 the pain to maintain it for the first move, so throw away
24460 dst
= change_address (dst
, BLKmode
, destreg
);
24462 src
= change_address (src
, BLKmode
, srcreg
);
24463 dst
= expand_set_or_movmem_prologue (dst
, src
, destreg
, srcreg
,
24464 promoted_val
, vec_promoted_val
,
24465 count_exp
, align
, desired_align
,
24467 /* At most desired_align - align bytes are copied. */
24468 if (min_size
< (unsigned)(desired_align
- align
))
24471 min_size
-= desired_align
- align
;
24475 /* If we know how many bytes need to be stored before dst is
24476 sufficiently aligned, maintain aliasing info accurately. */
24477 dst
= expand_set_or_movmem_constant_prologue (dst
, &src
, destreg
,
24485 count_exp
= plus_constant (counter_mode (count_exp
),
24486 count_exp
, -align_bytes
);
24487 count
-= align_bytes
;
24488 min_size
-= align_bytes
;
24489 max_size
-= align_bytes
;
24491 if (need_zero_guard
24493 && (count
< (unsigned HOST_WIDE_INT
) size_needed
24494 || (align_bytes
== 0
24495 && count
< ((unsigned HOST_WIDE_INT
) size_needed
24496 + desired_align
- align
))))
24498 /* It is possible that we copied enough so the main loop will not
24500 gcc_assert (size_needed
> 1);
24501 if (label
== NULL_RTX
)
24502 label
= gen_label_rtx ();
24503 emit_cmp_and_jump_insns (count_exp
,
24504 GEN_INT (size_needed
),
24505 LTU
, 0, counter_mode (count_exp
), 1, label
);
24506 if (expected_size
== -1
24507 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
24508 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24510 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24513 if (label
&& size_needed
== 1)
24515 emit_label (label
);
24516 LABEL_NUSES (label
) = 1;
24518 epilogue_size_needed
= 1;
24520 promoted_val
= val_exp
;
24522 else if (label
== NULL_RTX
&& !misaligned_prologue_used
)
24523 epilogue_size_needed
= size_needed
;
24525 /* Step 3: Main loop. */
24532 gcc_unreachable ();
24535 case unrolled_loop
:
24536 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, promoted_val
,
24537 count_exp
, move_mode
, unroll_factor
,
24538 expected_size
, issetmem
);
24541 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
,
24542 vec_promoted_val
, count_exp
, move_mode
,
24543 unroll_factor
, expected_size
, issetmem
);
24545 case rep_prefix_8_byte
:
24546 case rep_prefix_4_byte
:
24547 case rep_prefix_1_byte
:
24548 expand_set_or_movmem_via_rep (dst
, src
, destreg
, srcreg
, promoted_val
,
24549 val_exp
, count_exp
, move_mode
, issetmem
);
24552 /* Adjust properly the offset of src and dest memory for aliasing. */
24553 if (CONST_INT_P (count_exp
))
24556 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
24557 (count
/ size_needed
) * size_needed
);
24558 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
24559 (count
/ size_needed
) * size_needed
);
24564 src
= change_address (src
, BLKmode
, srcreg
);
24565 dst
= change_address (dst
, BLKmode
, destreg
);
24568 /* Step 4: Epilogue to copy the remaining bytes. */
24572 /* When the main loop is done, COUNT_EXP might hold original count,
24573 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24574 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24575 bytes. Compensate if needed. */
24577 if (size_needed
< epilogue_size_needed
)
24580 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
24581 GEN_INT (size_needed
- 1), count_exp
, 1,
24583 if (tmp
!= count_exp
)
24584 emit_move_insn (count_exp
, tmp
);
24586 emit_label (label
);
24587 LABEL_NUSES (label
) = 1;
24590 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
24592 if (force_loopy_epilogue
)
24593 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
24594 epilogue_size_needed
);
24598 expand_setmem_epilogue (dst
, destreg
, promoted_val
,
24599 vec_promoted_val
, count_exp
,
24600 epilogue_size_needed
);
24602 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
24603 epilogue_size_needed
);
24606 if (jump_around_label
)
24607 emit_label (jump_around_label
);
24612 /* Expand the appropriate insns for doing strlen if not just doing
24615 out = result, initialized with the start address
24616 align_rtx = alignment of the address.
24617 scratch = scratch register, initialized with the startaddress when
24618 not aligned, otherwise undefined
24620 This is just the body. It needs the initializations mentioned above and
24621 some address computing at the end. These things are done in i386.md. */
24624 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
24628 rtx_code_label
*align_2_label
= NULL
;
24629 rtx_code_label
*align_3_label
= NULL
;
24630 rtx_code_label
*align_4_label
= gen_label_rtx ();
24631 rtx_code_label
*end_0_label
= gen_label_rtx ();
24633 rtx tmpreg
= gen_reg_rtx (SImode
);
24634 rtx scratch
= gen_reg_rtx (SImode
);
24638 if (CONST_INT_P (align_rtx
))
24639 align
= INTVAL (align_rtx
);
24641 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24643 /* Is there a known alignment and is it less than 4? */
24646 rtx scratch1
= gen_reg_rtx (Pmode
);
24647 emit_move_insn (scratch1
, out
);
24648 /* Is there a known alignment and is it not 2? */
24651 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
24652 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
24654 /* Leave just the 3 lower bits. */
24655 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
24656 NULL_RTX
, 0, OPTAB_WIDEN
);
24658 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24659 Pmode
, 1, align_4_label
);
24660 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
24661 Pmode
, 1, align_2_label
);
24662 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
24663 Pmode
, 1, align_3_label
);
24667 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24668 check if is aligned to 4 - byte. */
24670 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
24671 NULL_RTX
, 0, OPTAB_WIDEN
);
24673 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24674 Pmode
, 1, align_4_label
);
24677 mem
= change_address (src
, QImode
, out
);
24679 /* Now compare the bytes. */
24681 /* Compare the first n unaligned byte on a byte per byte basis. */
24682 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
24683 QImode
, 1, end_0_label
);
24685 /* Increment the address. */
24686 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24688 /* Not needed with an alignment of 2 */
24691 emit_label (align_2_label
);
24693 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24696 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24698 emit_label (align_3_label
);
24701 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24704 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24707 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24708 align this loop. It gives only huge programs, but does not help to
24710 emit_label (align_4_label
);
24712 mem
= change_address (src
, SImode
, out
);
24713 emit_move_insn (scratch
, mem
);
24714 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
24716 /* This formula yields a nonzero result iff one of the bytes is zero.
24717 This saves three branches inside loop and many cycles. */
24719 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
24720 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
24721 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
24722 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
24723 gen_int_mode (0x80808080, SImode
)));
24724 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
24729 rtx reg
= gen_reg_rtx (SImode
);
24730 rtx reg2
= gen_reg_rtx (Pmode
);
24731 emit_move_insn (reg
, tmpreg
);
24732 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
24734 /* If zero is not in the first two bytes, move two bytes forward. */
24735 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24736 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24737 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24738 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
24739 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
24742 /* Emit lea manually to avoid clobbering of flags. */
24743 emit_insn (gen_rtx_SET (SImode
, reg2
,
24744 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
24746 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24747 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24748 emit_insn (gen_rtx_SET (VOIDmode
, out
,
24749 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
24755 rtx_code_label
*end_2_label
= gen_label_rtx ();
24756 /* Is zero in the first two bytes? */
24758 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24759 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24760 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
24761 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24762 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
24764 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24765 JUMP_LABEL (tmp
) = end_2_label
;
24767 /* Not in the first two. Move two bytes forward. */
24768 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
24769 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
24771 emit_label (end_2_label
);
24775 /* Avoid branch in fixing the byte. */
24776 tmpreg
= gen_lowpart (QImode
, tmpreg
);
24777 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
24778 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24779 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
24780 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
24782 emit_label (end_0_label
);
24785 /* Expand strlen. */
24788 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
24790 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
24792 /* The generic case of strlen expander is long. Avoid it's
24793 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24795 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24796 && !TARGET_INLINE_ALL_STRINGOPS
24797 && !optimize_insn_for_size_p ()
24798 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
24801 addr
= force_reg (Pmode
, XEXP (src
, 0));
24802 scratch1
= gen_reg_rtx (Pmode
);
24804 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24805 && !optimize_insn_for_size_p ())
24807 /* Well it seems that some optimizer does not combine a call like
24808 foo(strlen(bar), strlen(bar));
24809 when the move and the subtraction is done here. It does calculate
24810 the length just once when these instructions are done inside of
24811 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24812 often used and I use one fewer register for the lifetime of
24813 output_strlen_unroll() this is better. */
24815 emit_move_insn (out
, addr
);
24817 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
24819 /* strlensi_unroll_1 returns the address of the zero at the end of
24820 the string, like memchr(), so compute the length by subtracting
24821 the start address. */
24822 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
24828 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24829 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
24832 scratch2
= gen_reg_rtx (Pmode
);
24833 scratch3
= gen_reg_rtx (Pmode
);
24834 scratch4
= force_reg (Pmode
, constm1_rtx
);
24836 emit_move_insn (scratch3
, addr
);
24837 eoschar
= force_reg (QImode
, eoschar
);
24839 src
= replace_equiv_address_nv (src
, scratch3
);
24841 /* If .md starts supporting :P, this can be done in .md. */
24842 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
24843 scratch4
), UNSPEC_SCAS
);
24844 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
24845 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
24846 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
24851 /* For given symbol (function) construct code to compute address of it's PLT
24852 entry in large x86-64 PIC model. */
24854 construct_plt_address (rtx symbol
)
24858 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
24859 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
24860 gcc_assert (Pmode
== DImode
);
24862 tmp
= gen_reg_rtx (Pmode
);
24863 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
24865 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
24866 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
24871 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
24873 rtx pop
, bool sibcall
)
24876 rtx use
= NULL
, call
;
24877 unsigned int vec_len
= 0;
24879 if (pop
== const0_rtx
)
24881 gcc_assert (!TARGET_64BIT
|| !pop
);
24883 if (TARGET_MACHO
&& !TARGET_64BIT
)
24886 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
24887 fnaddr
= machopic_indirect_call_target (fnaddr
);
24892 /* Static functions and indirect calls don't need the pic register. */
24895 || (ix86_cmodel
== CM_LARGE_PIC
24896 && DEFAULT_ABI
!= MS_ABI
))
24897 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24898 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
24899 use_reg (&use
, pic_offset_table_rtx
);
24902 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
24904 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
24905 emit_move_insn (al
, callarg2
);
24906 use_reg (&use
, al
);
24909 if (ix86_cmodel
== CM_LARGE_PIC
24912 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24913 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
24914 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
24916 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
24917 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
24919 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
24920 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
24923 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
24925 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
24926 vec
[vec_len
++] = call
;
24930 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
24931 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
24932 vec
[vec_len
++] = pop
;
24935 if (TARGET_64BIT_MS_ABI
24936 && (!callarg2
|| INTVAL (callarg2
) != -2))
24938 int const cregs_size
24939 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
24942 for (i
= 0; i
< cregs_size
; i
++)
24944 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
24945 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
24947 clobber_reg (&use
, gen_rtx_REG (mode
, regno
));
24952 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
24953 call
= emit_call_insn (call
);
24955 CALL_INSN_FUNCTION_USAGE (call
) = use
;
24960 /* Output the assembly for a call instruction. */
24963 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
24965 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24966 bool seh_nop_p
= false;
24969 if (SIBLING_CALL_P (insn
))
24973 /* SEH epilogue detection requires the indirect branch case
24974 to include REX.W. */
24975 else if (TARGET_SEH
)
24976 xasm
= "rex.W jmp %A0";
24980 output_asm_insn (xasm
, &call_op
);
24984 /* SEH unwinding can require an extra nop to be emitted in several
24985 circumstances. Determine if we have one of those. */
24990 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24992 /* If we get to another real insn, we don't need the nop. */
24996 /* If we get to the epilogue note, prevent a catch region from
24997 being adjacent to the standard epilogue sequence. If non-
24998 call-exceptions, we'll have done this during epilogue emission. */
24999 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
25000 && !flag_non_call_exceptions
25001 && !can_throw_internal (insn
))
25008 /* If we didn't find a real insn following the call, prevent the
25009 unwinder from looking into the next function. */
25015 xasm
= "call\t%P0";
25017 xasm
= "call\t%A0";
25019 output_asm_insn (xasm
, &call_op
);
25027 /* Clear stack slot assignments remembered from previous functions.
25028 This is called from INIT_EXPANDERS once before RTL is emitted for each
25031 static struct machine_function
*
25032 ix86_init_machine_status (void)
25034 struct machine_function
*f
;
25036 f
= ggc_cleared_alloc
<machine_function
> ();
25037 f
->use_fast_prologue_epilogue_nregs
= -1;
25038 f
->call_abi
= ix86_abi
;
25043 /* Return a MEM corresponding to a stack slot with mode MODE.
25044 Allocate a new slot if necessary.
25046 The RTL for a function can have several slots available: N is
25047 which slot to use. */
25050 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
25052 struct stack_local_entry
*s
;
25054 gcc_assert (n
< MAX_386_STACK_LOCALS
);
25056 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
25057 if (s
->mode
== mode
&& s
->n
== n
)
25058 return validize_mem (copy_rtx (s
->rtl
));
25060 s
= ggc_alloc
<stack_local_entry
> ();
25063 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
25065 s
->next
= ix86_stack_locals
;
25066 ix86_stack_locals
= s
;
25067 return validize_mem (copy_rtx (s
->rtl
));
25071 ix86_instantiate_decls (void)
25073 struct stack_local_entry
*s
;
25075 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
25076 if (s
->rtl
!= NULL_RTX
)
25077 instantiate_decl_rtl (s
->rtl
);
25080 /* Check whether x86 address PARTS is a pc-relative address. */
25083 rip_relative_addr_p (struct ix86_address
*parts
)
25085 rtx base
, index
, disp
;
25087 base
= parts
->base
;
25088 index
= parts
->index
;
25089 disp
= parts
->disp
;
25091 if (disp
&& !base
&& !index
)
25097 if (GET_CODE (disp
) == CONST
)
25098 symbol
= XEXP (disp
, 0);
25099 if (GET_CODE (symbol
) == PLUS
25100 && CONST_INT_P (XEXP (symbol
, 1)))
25101 symbol
= XEXP (symbol
, 0);
25103 if (GET_CODE (symbol
) == LABEL_REF
25104 || (GET_CODE (symbol
) == SYMBOL_REF
25105 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
25106 || (GET_CODE (symbol
) == UNSPEC
25107 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
25108 || XINT (symbol
, 1) == UNSPEC_PCREL
25109 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
25116 /* Calculate the length of the memory address in the instruction encoding.
25117 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25118 or other prefixes. We never generate addr32 prefix for LEA insn. */
25121 memory_address_length (rtx addr
, bool lea
)
25123 struct ix86_address parts
;
25124 rtx base
, index
, disp
;
25128 if (GET_CODE (addr
) == PRE_DEC
25129 || GET_CODE (addr
) == POST_INC
25130 || GET_CODE (addr
) == PRE_MODIFY
25131 || GET_CODE (addr
) == POST_MODIFY
)
25134 ok
= ix86_decompose_address (addr
, &parts
);
25137 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
25139 /* If this is not LEA instruction, add the length of addr32 prefix. */
25140 if (TARGET_64BIT
&& !lea
25141 && (SImode_address_operand (addr
, VOIDmode
)
25142 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
25143 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
25147 index
= parts
.index
;
25150 if (base
&& GET_CODE (base
) == SUBREG
)
25151 base
= SUBREG_REG (base
);
25152 if (index
&& GET_CODE (index
) == SUBREG
)
25153 index
= SUBREG_REG (index
);
25155 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
25156 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
25159 - esp as the base always wants an index,
25160 - ebp as the base always wants a displacement,
25161 - r12 as the base always wants an index,
25162 - r13 as the base always wants a displacement. */
25164 /* Register Indirect. */
25165 if (base
&& !index
&& !disp
)
25167 /* esp (for its index) and ebp (for its displacement) need
25168 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25170 if (base
== arg_pointer_rtx
25171 || base
== frame_pointer_rtx
25172 || REGNO (base
) == SP_REG
25173 || REGNO (base
) == BP_REG
25174 || REGNO (base
) == R12_REG
25175 || REGNO (base
) == R13_REG
)
25179 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25180 is not disp32, but disp32(%rip), so for disp32
25181 SIB byte is needed, unless print_operand_address
25182 optimizes it into disp32(%rip) or (%rip) is implied
25184 else if (disp
&& !base
&& !index
)
25187 if (rip_relative_addr_p (&parts
))
25192 /* Find the length of the displacement constant. */
25195 if (base
&& satisfies_constraint_K (disp
))
25200 /* ebp always wants a displacement. Similarly r13. */
25201 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
25204 /* An index requires the two-byte modrm form.... */
25206 /* ...like esp (or r12), which always wants an index. */
25207 || base
== arg_pointer_rtx
25208 || base
== frame_pointer_rtx
25209 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
25216 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25217 is set, expect that insn have 8bit immediate alternative. */
25219 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
25223 extract_insn_cached (insn
);
25224 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
25225 if (CONSTANT_P (recog_data
.operand
[i
]))
25227 enum attr_mode mode
= get_attr_mode (insn
);
25230 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
25232 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
25239 ival
= trunc_int_for_mode (ival
, HImode
);
25242 ival
= trunc_int_for_mode (ival
, SImode
);
25247 if (IN_RANGE (ival
, -128, 127))
25264 /* Immediates for DImode instructions are encoded
25265 as 32bit sign extended values. */
25270 fatal_insn ("unknown insn mode", insn
);
25276 /* Compute default value for "length_address" attribute. */
25278 ix86_attr_length_address_default (rtx_insn
*insn
)
25282 if (get_attr_type (insn
) == TYPE_LEA
)
25284 rtx set
= PATTERN (insn
), addr
;
25286 if (GET_CODE (set
) == PARALLEL
)
25287 set
= XVECEXP (set
, 0, 0);
25289 gcc_assert (GET_CODE (set
) == SET
);
25291 addr
= SET_SRC (set
);
25293 return memory_address_length (addr
, true);
25296 extract_insn_cached (insn
);
25297 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
25298 if (MEM_P (recog_data
.operand
[i
]))
25300 constrain_operands_cached (reload_completed
);
25301 if (which_alternative
!= -1)
25303 const char *constraints
= recog_data
.constraints
[i
];
25304 int alt
= which_alternative
;
25306 while (*constraints
== '=' || *constraints
== '+')
25309 while (*constraints
++ != ',')
25311 /* Skip ignored operands. */
25312 if (*constraints
== 'X')
25315 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
25320 /* Compute default value for "length_vex" attribute. It includes
25321 2 or 3 byte VEX prefix and 1 opcode byte. */
25324 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
25329 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25330 byte VEX prefix. */
25331 if (!has_0f_opcode
|| has_vex_w
)
25334 /* We can always use 2 byte VEX prefix in 32bit. */
25338 extract_insn_cached (insn
);
25340 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
25341 if (REG_P (recog_data
.operand
[i
]))
25343 /* REX.W bit uses 3 byte VEX prefix. */
25344 if (GET_MODE (recog_data
.operand
[i
]) == DImode
25345 && GENERAL_REG_P (recog_data
.operand
[i
]))
25350 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25351 if (MEM_P (recog_data
.operand
[i
])
25352 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
25359 /* Return the maximum number of instructions a cpu can issue. */
25362 ix86_issue_rate (void)
25366 case PROCESSOR_PENTIUM
:
25367 case PROCESSOR_BONNELL
:
25368 case PROCESSOR_SILVERMONT
:
25369 case PROCESSOR_INTEL
:
25371 case PROCESSOR_BTVER2
:
25372 case PROCESSOR_PENTIUM4
:
25373 case PROCESSOR_NOCONA
:
25376 case PROCESSOR_PENTIUMPRO
:
25377 case PROCESSOR_ATHLON
:
25379 case PROCESSOR_AMDFAM10
:
25380 case PROCESSOR_GENERIC
:
25381 case PROCESSOR_BTVER1
:
25384 case PROCESSOR_BDVER1
:
25385 case PROCESSOR_BDVER2
:
25386 case PROCESSOR_BDVER3
:
25387 case PROCESSOR_BDVER4
:
25388 case PROCESSOR_CORE2
:
25389 case PROCESSOR_NEHALEM
:
25390 case PROCESSOR_SANDYBRIDGE
:
25391 case PROCESSOR_HASWELL
:
25399 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25400 by DEP_INSN and nothing set by DEP_INSN. */
25403 ix86_flags_dependent (rtx_insn
*insn
, rtx_insn
*dep_insn
, enum attr_type insn_type
)
25407 /* Simplify the test for uninteresting insns. */
25408 if (insn_type
!= TYPE_SETCC
25409 && insn_type
!= TYPE_ICMOV
25410 && insn_type
!= TYPE_FCMOV
25411 && insn_type
!= TYPE_IBR
)
25414 if ((set
= single_set (dep_insn
)) != 0)
25416 set
= SET_DEST (set
);
25419 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
25420 && XVECLEN (PATTERN (dep_insn
), 0) == 2
25421 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
25422 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
25424 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
25425 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
25430 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
25433 /* This test is true if the dependent insn reads the flags but
25434 not any other potentially set register. */
25435 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
25438 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
25444 /* Return true iff USE_INSN has a memory address with operands set by
25448 ix86_agi_dependent (rtx_insn
*set_insn
, rtx_insn
*use_insn
)
25451 extract_insn_cached (use_insn
);
25452 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
25453 if (MEM_P (recog_data
.operand
[i
]))
25455 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
25456 return modified_in_p (addr
, set_insn
) != 0;
25461 /* Helper function for exact_store_load_dependency.
25462 Return true if addr is found in insn. */
25464 exact_dependency_1 (rtx addr
, rtx insn
)
25466 enum rtx_code code
;
25467 const char *format_ptr
;
25470 code
= GET_CODE (insn
);
25474 if (rtx_equal_p (addr
, insn
))
25489 format_ptr
= GET_RTX_FORMAT (code
);
25490 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
25492 switch (*format_ptr
++)
25495 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
25499 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
25500 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
25508 /* Return true if there exists exact dependency for store & load, i.e.
25509 the same memory address is used in them. */
25511 exact_store_load_dependency (rtx_insn
*store
, rtx_insn
*load
)
25515 set1
= single_set (store
);
25518 if (!MEM_P (SET_DEST (set1
)))
25520 set2
= single_set (load
);
25523 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
25529 ix86_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
25531 enum attr_type insn_type
, dep_insn_type
;
25532 enum attr_memory memory
;
25534 int dep_insn_code_number
;
25536 /* Anti and output dependencies have zero cost on all CPUs. */
25537 if (REG_NOTE_KIND (link
) != 0)
25540 dep_insn_code_number
= recog_memoized (dep_insn
);
25542 /* If we can't recognize the insns, we can't really do anything. */
25543 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
25546 insn_type
= get_attr_type (insn
);
25547 dep_insn_type
= get_attr_type (dep_insn
);
25551 case PROCESSOR_PENTIUM
:
25552 /* Address Generation Interlock adds a cycle of latency. */
25553 if (insn_type
== TYPE_LEA
)
25555 rtx addr
= PATTERN (insn
);
25557 if (GET_CODE (addr
) == PARALLEL
)
25558 addr
= XVECEXP (addr
, 0, 0);
25560 gcc_assert (GET_CODE (addr
) == SET
);
25562 addr
= SET_SRC (addr
);
25563 if (modified_in_p (addr
, dep_insn
))
25566 else if (ix86_agi_dependent (dep_insn
, insn
))
25569 /* ??? Compares pair with jump/setcc. */
25570 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
25573 /* Floating point stores require value to be ready one cycle earlier. */
25574 if (insn_type
== TYPE_FMOV
25575 && get_attr_memory (insn
) == MEMORY_STORE
25576 && !ix86_agi_dependent (dep_insn
, insn
))
25580 case PROCESSOR_PENTIUMPRO
:
25581 /* INT->FP conversion is expensive. */
25582 if (get_attr_fp_int_src (dep_insn
))
25585 /* There is one cycle extra latency between an FP op and a store. */
25586 if (insn_type
== TYPE_FMOV
25587 && (set
= single_set (dep_insn
)) != NULL_RTX
25588 && (set2
= single_set (insn
)) != NULL_RTX
25589 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
25590 && MEM_P (SET_DEST (set2
)))
25593 memory
= get_attr_memory (insn
);
25595 /* Show ability of reorder buffer to hide latency of load by executing
25596 in parallel with previous instruction in case
25597 previous instruction is not needed to compute the address. */
25598 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25599 && !ix86_agi_dependent (dep_insn
, insn
))
25601 /* Claim moves to take one cycle, as core can issue one load
25602 at time and the next load can start cycle later. */
25603 if (dep_insn_type
== TYPE_IMOV
25604 || dep_insn_type
== TYPE_FMOV
)
25612 /* The esp dependency is resolved before
25613 the instruction is really finished. */
25614 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25615 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25618 /* INT->FP conversion is expensive. */
25619 if (get_attr_fp_int_src (dep_insn
))
25622 memory
= get_attr_memory (insn
);
25624 /* Show ability of reorder buffer to hide latency of load by executing
25625 in parallel with previous instruction in case
25626 previous instruction is not needed to compute the address. */
25627 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25628 && !ix86_agi_dependent (dep_insn
, insn
))
25630 /* Claim moves to take one cycle, as core can issue one load
25631 at time and the next load can start cycle later. */
25632 if (dep_insn_type
== TYPE_IMOV
25633 || dep_insn_type
== TYPE_FMOV
)
25642 case PROCESSOR_AMDFAM10
:
25643 case PROCESSOR_BDVER1
:
25644 case PROCESSOR_BDVER2
:
25645 case PROCESSOR_BDVER3
:
25646 case PROCESSOR_BDVER4
:
25647 case PROCESSOR_BTVER1
:
25648 case PROCESSOR_BTVER2
:
25649 case PROCESSOR_GENERIC
:
25650 /* Stack engine allows to execute push&pop instructions in parall. */
25651 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25652 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25656 case PROCESSOR_ATHLON
:
25658 memory
= get_attr_memory (insn
);
25660 /* Show ability of reorder buffer to hide latency of load by executing
25661 in parallel with previous instruction in case
25662 previous instruction is not needed to compute the address. */
25663 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25664 && !ix86_agi_dependent (dep_insn
, insn
))
25666 enum attr_unit unit
= get_attr_unit (insn
);
25669 /* Because of the difference between the length of integer and
25670 floating unit pipeline preparation stages, the memory operands
25671 for floating point are cheaper.
25673 ??? For Athlon it the difference is most probably 2. */
25674 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
25677 loadcost
= TARGET_ATHLON
? 2 : 0;
25679 if (cost
>= loadcost
)
25686 case PROCESSOR_CORE2
:
25687 case PROCESSOR_NEHALEM
:
25688 case PROCESSOR_SANDYBRIDGE
:
25689 case PROCESSOR_HASWELL
:
25690 /* Stack engine allows to execute push&pop instructions in parall. */
25691 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25692 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25695 memory
= get_attr_memory (insn
);
25697 /* Show ability of reorder buffer to hide latency of load by executing
25698 in parallel with previous instruction in case
25699 previous instruction is not needed to compute the address. */
25700 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25701 && !ix86_agi_dependent (dep_insn
, insn
))
25710 case PROCESSOR_SILVERMONT
:
25711 case PROCESSOR_INTEL
:
25712 if (!reload_completed
)
25715 /* Increase cost of integer loads. */
25716 memory
= get_attr_memory (dep_insn
);
25717 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25719 enum attr_unit unit
= get_attr_unit (dep_insn
);
25720 if (unit
== UNIT_INTEGER
&& cost
== 1)
25722 if (memory
== MEMORY_LOAD
)
25726 /* Increase cost of ld/st for short int types only
25727 because of store forwarding issue. */
25728 rtx set
= single_set (dep_insn
);
25729 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
25730 || GET_MODE (SET_DEST (set
)) == HImode
))
25732 /* Increase cost of store/load insn if exact
25733 dependence exists and it is load insn. */
25734 enum attr_memory insn_memory
= get_attr_memory (insn
);
25735 if (insn_memory
== MEMORY_LOAD
25736 && exact_store_load_dependency (dep_insn
, insn
))
25750 /* How many alternative schedules to try. This should be as wide as the
25751 scheduling freedom in the DFA, but no wider. Making this value too
25752 large results extra work for the scheduler. */
25755 ia32_multipass_dfa_lookahead (void)
25759 case PROCESSOR_PENTIUM
:
25762 case PROCESSOR_PENTIUMPRO
:
25766 case PROCESSOR_BDVER1
:
25767 case PROCESSOR_BDVER2
:
25768 case PROCESSOR_BDVER3
:
25769 case PROCESSOR_BDVER4
:
25770 /* We use lookahead value 4 for BD both before and after reload
25771 schedules. Plan is to have value 8 included for O3. */
25774 case PROCESSOR_CORE2
:
25775 case PROCESSOR_NEHALEM
:
25776 case PROCESSOR_SANDYBRIDGE
:
25777 case PROCESSOR_HASWELL
:
25778 case PROCESSOR_BONNELL
:
25779 case PROCESSOR_SILVERMONT
:
25780 case PROCESSOR_INTEL
:
25781 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25782 as many instructions can be executed on a cycle, i.e.,
25783 issue_rate. I wonder why tuning for many CPUs does not do this. */
25784 if (reload_completed
)
25785 return ix86_issue_rate ();
25786 /* Don't use lookahead for pre-reload schedule to save compile time. */
25794 /* Return true if target platform supports macro-fusion. */
25797 ix86_macro_fusion_p ()
25799 return TARGET_FUSE_CMP_AND_BRANCH
;
25802 /* Check whether current microarchitecture support macro fusion
25803 for insn pair "CONDGEN + CONDJMP". Refer to
25804 "Intel Architectures Optimization Reference Manual". */
25807 ix86_macro_fusion_pair_p (rtx_insn
*condgen
, rtx_insn
*condjmp
)
25810 enum rtx_code ccode
;
25811 rtx compare_set
= NULL_RTX
, test_if
, cond
;
25812 rtx alu_set
= NULL_RTX
, addr
= NULL_RTX
;
25814 if (!any_condjump_p (condjmp
))
25817 if (get_attr_type (condgen
) != TYPE_TEST
25818 && get_attr_type (condgen
) != TYPE_ICMP
25819 && get_attr_type (condgen
) != TYPE_INCDEC
25820 && get_attr_type (condgen
) != TYPE_ALU
)
25823 compare_set
= single_set (condgen
);
25824 if (compare_set
== NULL_RTX
25825 && !TARGET_FUSE_ALU_AND_BRANCH
)
25828 if (compare_set
== NULL_RTX
)
25831 rtx pat
= PATTERN (condgen
);
25832 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
25833 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
25835 rtx set_src
= SET_SRC (XVECEXP (pat
, 0, i
));
25836 if (GET_CODE (set_src
) == COMPARE
)
25837 compare_set
= XVECEXP (pat
, 0, i
);
25839 alu_set
= XVECEXP (pat
, 0, i
);
25842 if (compare_set
== NULL_RTX
)
25844 src
= SET_SRC (compare_set
);
25845 if (GET_CODE (src
) != COMPARE
)
25848 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25850 if ((MEM_P (XEXP (src
, 0))
25851 && CONST_INT_P (XEXP (src
, 1)))
25852 || (MEM_P (XEXP (src
, 1))
25853 && CONST_INT_P (XEXP (src
, 0))))
25856 /* No fusion for RIP-relative address. */
25857 if (MEM_P (XEXP (src
, 0)))
25858 addr
= XEXP (XEXP (src
, 0), 0);
25859 else if (MEM_P (XEXP (src
, 1)))
25860 addr
= XEXP (XEXP (src
, 1), 0);
25863 ix86_address parts
;
25864 int ok
= ix86_decompose_address (addr
, &parts
);
25867 if (rip_relative_addr_p (&parts
))
25871 test_if
= SET_SRC (pc_set (condjmp
));
25872 cond
= XEXP (test_if
, 0);
25873 ccode
= GET_CODE (cond
);
25874 /* Check whether conditional jump use Sign or Overflow Flags. */
25875 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25882 /* Return true for TYPE_TEST and TYPE_ICMP. */
25883 if (get_attr_type (condgen
) == TYPE_TEST
25884 || get_attr_type (condgen
) == TYPE_ICMP
)
25887 /* The following is the case that macro-fusion for alu + jmp. */
25888 if (!TARGET_FUSE_ALU_AND_BRANCH
|| !alu_set
)
25891 /* No fusion for alu op with memory destination operand. */
25892 dest
= SET_DEST (alu_set
);
25896 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25898 if (get_attr_type (condgen
) == TYPE_INCDEC
25908 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25909 execution. It is applied if
25910 (1) IMUL instruction is on the top of list;
25911 (2) There exists the only producer of independent IMUL instruction in
25913 Return index of IMUL producer if it was found and -1 otherwise. */
25915 do_reorder_for_imul (rtx_insn
**ready
, int n_ready
)
25918 rtx set
, insn1
, insn2
;
25919 sd_iterator_def sd_it
;
25924 if (!TARGET_BONNELL
)
25927 /* Check that IMUL instruction is on the top of ready list. */
25928 insn
= ready
[n_ready
- 1];
25929 set
= single_set (insn
);
25932 if (!(GET_CODE (SET_SRC (set
)) == MULT
25933 && GET_MODE (SET_SRC (set
)) == SImode
))
25936 /* Search for producer of independent IMUL instruction. */
25937 for (i
= n_ready
- 2; i
>= 0; i
--)
25940 if (!NONDEBUG_INSN_P (insn
))
25942 /* Skip IMUL instruction. */
25943 insn2
= PATTERN (insn
);
25944 if (GET_CODE (insn2
) == PARALLEL
)
25945 insn2
= XVECEXP (insn2
, 0, 0);
25946 if (GET_CODE (insn2
) == SET
25947 && GET_CODE (SET_SRC (insn2
)) == MULT
25948 && GET_MODE (SET_SRC (insn2
)) == SImode
)
25951 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
25954 con
= DEP_CON (dep
);
25955 if (!NONDEBUG_INSN_P (con
))
25957 insn1
= PATTERN (con
);
25958 if (GET_CODE (insn1
) == PARALLEL
)
25959 insn1
= XVECEXP (insn1
, 0, 0);
25961 if (GET_CODE (insn1
) == SET
25962 && GET_CODE (SET_SRC (insn1
)) == MULT
25963 && GET_MODE (SET_SRC (insn1
)) == SImode
)
25965 sd_iterator_def sd_it1
;
25967 /* Check if there is no other dependee for IMUL. */
25969 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
25972 pro
= DEP_PRO (dep1
);
25973 if (!NONDEBUG_INSN_P (pro
))
25988 /* Try to find the best candidate on the top of ready list if two insns
25989 have the same priority - candidate is best if its dependees were
25990 scheduled earlier. Applied for Silvermont only.
25991 Return true if top 2 insns must be interchanged. */
25993 swap_top_of_ready_list (rtx_insn
**ready
, int n_ready
)
25995 rtx_insn
*top
= ready
[n_ready
- 1];
25996 rtx_insn
*next
= ready
[n_ready
- 2];
25998 sd_iterator_def sd_it
;
26002 #define INSN_TICK(INSN) (HID (INSN)->tick)
26004 if (!TARGET_SILVERMONT
&& !TARGET_INTEL
)
26007 if (!NONDEBUG_INSN_P (top
))
26009 if (!NONJUMP_INSN_P (top
))
26011 if (!NONDEBUG_INSN_P (next
))
26013 if (!NONJUMP_INSN_P (next
))
26015 set
= single_set (top
);
26018 set
= single_set (next
);
26022 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
26024 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
26026 /* Determine winner more precise. */
26027 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
26030 pro
= DEP_PRO (dep
);
26031 if (!NONDEBUG_INSN_P (pro
))
26033 if (INSN_TICK (pro
) > clock1
)
26034 clock1
= INSN_TICK (pro
);
26036 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
26039 pro
= DEP_PRO (dep
);
26040 if (!NONDEBUG_INSN_P (pro
))
26042 if (INSN_TICK (pro
) > clock2
)
26043 clock2
= INSN_TICK (pro
);
26046 if (clock1
== clock2
)
26048 /* Determine winner - load must win. */
26049 enum attr_memory memory1
, memory2
;
26050 memory1
= get_attr_memory (top
);
26051 memory2
= get_attr_memory (next
);
26052 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
26055 return (bool) (clock2
< clock1
);
26061 /* Perform possible reodering of ready list for Atom/Silvermont only.
26062 Return issue rate. */
26064 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
26065 int *pn_ready
, int clock_var
)
26067 int issue_rate
= -1;
26068 int n_ready
= *pn_ready
;
26073 /* Set up issue rate. */
26074 issue_rate
= ix86_issue_rate ();
26076 /* Do reodering for BONNELL/SILVERMONT only. */
26077 if (!TARGET_BONNELL
&& !TARGET_SILVERMONT
&& !TARGET_INTEL
)
26080 /* Nothing to do if ready list contains only 1 instruction. */
26084 /* Do reodering for post-reload scheduler only. */
26085 if (!reload_completed
)
26088 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
26090 if (sched_verbose
> 1)
26091 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
26092 INSN_UID (ready
[index
]));
26094 /* Put IMUL producer (ready[index]) at the top of ready list. */
26095 insn
= ready
[index
];
26096 for (i
= index
; i
< n_ready
- 1; i
++)
26097 ready
[i
] = ready
[i
+ 1];
26098 ready
[n_ready
- 1] = insn
;
26101 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
26103 if (sched_verbose
> 1)
26104 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
26105 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
26106 /* Swap 2 top elements of ready list. */
26107 insn
= ready
[n_ready
- 1];
26108 ready
[n_ready
- 1] = ready
[n_ready
- 2];
26109 ready
[n_ready
- 2] = insn
;
26115 ix86_class_likely_spilled_p (reg_class_t
);
26117 /* Returns true if lhs of insn is HW function argument register and set up
26118 is_spilled to true if it is likely spilled HW register. */
26120 insn_is_function_arg (rtx insn
, bool* is_spilled
)
26124 if (!NONDEBUG_INSN_P (insn
))
26126 /* Call instructions are not movable, ignore it. */
26129 insn
= PATTERN (insn
);
26130 if (GET_CODE (insn
) == PARALLEL
)
26131 insn
= XVECEXP (insn
, 0, 0);
26132 if (GET_CODE (insn
) != SET
)
26134 dst
= SET_DEST (insn
);
26135 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
26136 && ix86_function_arg_regno_p (REGNO (dst
)))
26138 /* Is it likely spilled HW register? */
26139 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
26140 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
26141 *is_spilled
= true;
26147 /* Add output dependencies for chain of function adjacent arguments if only
26148 there is a move to likely spilled HW register. Return first argument
26149 if at least one dependence was added or NULL otherwise. */
26151 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
26154 rtx_insn
*last
= call
;
26155 rtx_insn
*first_arg
= NULL
;
26156 bool is_spilled
= false;
26158 head
= PREV_INSN (head
);
26160 /* Find nearest to call argument passing instruction. */
26163 last
= PREV_INSN (last
);
26166 if (!NONDEBUG_INSN_P (last
))
26168 if (insn_is_function_arg (last
, &is_spilled
))
26176 insn
= PREV_INSN (last
);
26177 if (!INSN_P (insn
))
26181 if (!NONDEBUG_INSN_P (insn
))
26186 if (insn_is_function_arg (insn
, &is_spilled
))
26188 /* Add output depdendence between two function arguments if chain
26189 of output arguments contains likely spilled HW registers. */
26191 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
26192 first_arg
= last
= insn
;
26202 /* Add output or anti dependency from insn to first_arg to restrict its code
26205 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
26210 set
= single_set (insn
);
26213 tmp
= SET_DEST (set
);
26216 /* Add output dependency to the first function argument. */
26217 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
26220 /* Add anti dependency. */
26221 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
26224 /* Avoid cross block motion of function argument through adding dependency
26225 from the first non-jump instruction in bb. */
26227 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
26229 rtx_insn
*insn
= BB_END (bb
);
26233 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
26235 rtx set
= single_set (insn
);
26238 avoid_func_arg_motion (arg
, insn
);
26242 if (insn
== BB_HEAD (bb
))
26244 insn
= PREV_INSN (insn
);
26248 /* Hook for pre-reload schedule - avoid motion of function arguments
26249 passed in likely spilled HW registers. */
26251 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
26254 rtx_insn
*first_arg
= NULL
;
26255 if (reload_completed
)
26257 while (head
!= tail
&& DEBUG_INSN_P (head
))
26258 head
= NEXT_INSN (head
);
26259 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
26260 if (INSN_P (insn
) && CALL_P (insn
))
26262 first_arg
= add_parameter_dependencies (insn
, head
);
26265 /* Add dependee for first argument to predecessors if only
26266 region contains more than one block. */
26267 basic_block bb
= BLOCK_FOR_INSN (insn
);
26268 int rgn
= CONTAINING_RGN (bb
->index
);
26269 int nr_blks
= RGN_NR_BLOCKS (rgn
);
26270 /* Skip trivial regions and region head blocks that can have
26271 predecessors outside of region. */
26272 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
26277 /* Regions are SCCs with the exception of selective
26278 scheduling with pipelining of outer blocks enabled.
26279 So also check that immediate predecessors of a non-head
26280 block are in the same region. */
26281 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
26283 /* Avoid creating of loop-carried dependencies through
26284 using topological ordering in the region. */
26285 if (rgn
== CONTAINING_RGN (e
->src
->index
)
26286 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
26287 add_dependee_for_func_arg (first_arg
, e
->src
);
26295 else if (first_arg
)
26296 avoid_func_arg_motion (first_arg
, insn
);
26299 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26300 HW registers to maximum, to schedule them at soon as possible. These are
26301 moves from function argument registers at the top of the function entry
26302 and moves from function return value registers after call. */
26304 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
26308 if (reload_completed
)
26311 if (!NONDEBUG_INSN_P (insn
))
26314 set
= single_set (insn
);
26317 rtx tmp
= SET_SRC (set
);
26319 && HARD_REGISTER_P (tmp
)
26320 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
26321 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
26322 return current_sched_info
->sched_max_insns_priority
;
26328 /* Model decoder of Core 2/i7.
26329 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26330 track the instruction fetch block boundaries and make sure that long
26331 (9+ bytes) instructions are assigned to D0. */
26333 /* Maximum length of an insn that can be handled by
26334 a secondary decoder unit. '8' for Core 2/i7. */
26335 static int core2i7_secondary_decoder_max_insn_size
;
26337 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26338 '16' for Core 2/i7. */
26339 static int core2i7_ifetch_block_size
;
26341 /* Maximum number of instructions decoder can handle per cycle.
26342 '6' for Core 2/i7. */
26343 static int core2i7_ifetch_block_max_insns
;
26345 typedef struct ix86_first_cycle_multipass_data_
*
26346 ix86_first_cycle_multipass_data_t
;
26347 typedef const struct ix86_first_cycle_multipass_data_
*
26348 const_ix86_first_cycle_multipass_data_t
;
26350 /* A variable to store target state across calls to max_issue within
26352 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
26353 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
26355 /* Initialize DATA. */
26357 core2i7_first_cycle_multipass_init (void *_data
)
26359 ix86_first_cycle_multipass_data_t data
26360 = (ix86_first_cycle_multipass_data_t
) _data
;
26362 data
->ifetch_block_len
= 0;
26363 data
->ifetch_block_n_insns
= 0;
26364 data
->ready_try_change
= NULL
;
26365 data
->ready_try_change_size
= 0;
26368 /* Advancing the cycle; reset ifetch block counts. */
26370 core2i7_dfa_post_advance_cycle (void)
26372 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
26374 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
26376 data
->ifetch_block_len
= 0;
26377 data
->ifetch_block_n_insns
= 0;
26380 static int min_insn_size (rtx_insn
*);
26382 /* Filter out insns from ready_try that the core will not be able to issue
26383 on current cycle due to decoder. */
26385 core2i7_first_cycle_multipass_filter_ready_try
26386 (const_ix86_first_cycle_multipass_data_t data
,
26387 signed char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
26394 if (ready_try
[n_ready
])
26397 insn
= get_ready_element (n_ready
);
26398 insn_size
= min_insn_size (insn
);
26400 if (/* If this is a too long an insn for a secondary decoder ... */
26401 (!first_cycle_insn_p
26402 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
26403 /* ... or it would not fit into the ifetch block ... */
26404 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
26405 /* ... or the decoder is full already ... */
26406 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
26407 /* ... mask the insn out. */
26409 ready_try
[n_ready
] = 1;
26411 if (data
->ready_try_change
)
26412 bitmap_set_bit (data
->ready_try_change
, n_ready
);
26417 /* Prepare for a new round of multipass lookahead scheduling. */
26419 core2i7_first_cycle_multipass_begin (void *_data
,
26420 signed char *ready_try
, int n_ready
,
26421 bool first_cycle_insn_p
)
26423 ix86_first_cycle_multipass_data_t data
26424 = (ix86_first_cycle_multipass_data_t
) _data
;
26425 const_ix86_first_cycle_multipass_data_t prev_data
26426 = ix86_first_cycle_multipass_data
;
26428 /* Restore the state from the end of the previous round. */
26429 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
26430 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
26432 /* Filter instructions that cannot be issued on current cycle due to
26433 decoder restrictions. */
26434 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
26435 first_cycle_insn_p
);
26438 /* INSN is being issued in current solution. Account for its impact on
26439 the decoder model. */
26441 core2i7_first_cycle_multipass_issue (void *_data
,
26442 signed char *ready_try
, int n_ready
,
26443 rtx_insn
*insn
, const void *_prev_data
)
26445 ix86_first_cycle_multipass_data_t data
26446 = (ix86_first_cycle_multipass_data_t
) _data
;
26447 const_ix86_first_cycle_multipass_data_t prev_data
26448 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
26450 int insn_size
= min_insn_size (insn
);
26452 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
26453 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
26454 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
26455 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
26457 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26458 if (!data
->ready_try_change
)
26460 data
->ready_try_change
= sbitmap_alloc (n_ready
);
26461 data
->ready_try_change_size
= n_ready
;
26463 else if (data
->ready_try_change_size
< n_ready
)
26465 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
26467 data
->ready_try_change_size
= n_ready
;
26469 bitmap_clear (data
->ready_try_change
);
26471 /* Filter out insns from ready_try that the core will not be able to issue
26472 on current cycle due to decoder. */
26473 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
26477 /* Revert the effect on ready_try. */
26479 core2i7_first_cycle_multipass_backtrack (const void *_data
,
26480 signed char *ready_try
,
26481 int n_ready ATTRIBUTE_UNUSED
)
26483 const_ix86_first_cycle_multipass_data_t data
26484 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26485 unsigned int i
= 0;
26486 sbitmap_iterator sbi
;
26488 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
26489 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
26495 /* Save the result of multipass lookahead scheduling for the next round. */
26497 core2i7_first_cycle_multipass_end (const void *_data
)
26499 const_ix86_first_cycle_multipass_data_t data
26500 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26501 ix86_first_cycle_multipass_data_t next_data
26502 = ix86_first_cycle_multipass_data
;
26506 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
26507 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
26511 /* Deallocate target data. */
26513 core2i7_first_cycle_multipass_fini (void *_data
)
26515 ix86_first_cycle_multipass_data_t data
26516 = (ix86_first_cycle_multipass_data_t
) _data
;
26518 if (data
->ready_try_change
)
26520 sbitmap_free (data
->ready_try_change
);
26521 data
->ready_try_change
= NULL
;
26522 data
->ready_try_change_size
= 0;
26526 /* Prepare for scheduling pass. */
26528 ix86_sched_init_global (FILE *, int, int)
26530 /* Install scheduling hooks for current CPU. Some of these hooks are used
26531 in time-critical parts of the scheduler, so we only set them up when
26532 they are actually used. */
26535 case PROCESSOR_CORE2
:
26536 case PROCESSOR_NEHALEM
:
26537 case PROCESSOR_SANDYBRIDGE
:
26538 case PROCESSOR_HASWELL
:
26539 /* Do not perform multipass scheduling for pre-reload schedule
26540 to save compile time. */
26541 if (reload_completed
)
26543 targetm
.sched
.dfa_post_advance_cycle
26544 = core2i7_dfa_post_advance_cycle
;
26545 targetm
.sched
.first_cycle_multipass_init
26546 = core2i7_first_cycle_multipass_init
;
26547 targetm
.sched
.first_cycle_multipass_begin
26548 = core2i7_first_cycle_multipass_begin
;
26549 targetm
.sched
.first_cycle_multipass_issue
26550 = core2i7_first_cycle_multipass_issue
;
26551 targetm
.sched
.first_cycle_multipass_backtrack
26552 = core2i7_first_cycle_multipass_backtrack
;
26553 targetm
.sched
.first_cycle_multipass_end
26554 = core2i7_first_cycle_multipass_end
;
26555 targetm
.sched
.first_cycle_multipass_fini
26556 = core2i7_first_cycle_multipass_fini
;
26558 /* Set decoder parameters. */
26559 core2i7_secondary_decoder_max_insn_size
= 8;
26560 core2i7_ifetch_block_size
= 16;
26561 core2i7_ifetch_block_max_insns
= 6;
26564 /* ... Fall through ... */
26566 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
26567 targetm
.sched
.first_cycle_multipass_init
= NULL
;
26568 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
26569 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
26570 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
26571 targetm
.sched
.first_cycle_multipass_end
= NULL
;
26572 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
26578 /* Compute the alignment given to a constant that is being placed in memory.
26579 EXP is the constant and ALIGN is the alignment that the object would
26581 The value of this function is used instead of that alignment to align
26585 ix86_constant_alignment (tree exp
, int align
)
26587 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
26588 || TREE_CODE (exp
) == INTEGER_CST
)
26590 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
26592 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
26595 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
26596 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
26597 return BITS_PER_WORD
;
26602 /* Compute the alignment for a static variable.
26603 TYPE is the data type, and ALIGN is the alignment that
26604 the object would ordinarily have. The value of this function is used
26605 instead of that alignment to align the object. */
26608 ix86_data_alignment (tree type
, int align
, bool opt
)
26610 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26611 for symbols from other compilation units or symbols that don't need
26612 to bind locally. In order to preserve some ABI compatibility with
26613 those compilers, ensure we don't decrease alignment from what we
26616 int max_align_compat
26617 = optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
26619 /* A data structure, equal or greater than the size of a cache line
26620 (64 bytes in the Pentium 4 and other recent Intel processors, including
26621 processors based on Intel Core microarchitecture) should be aligned
26622 so that its base address is a multiple of a cache line size. */
26625 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
26627 if (max_align
< BITS_PER_WORD
)
26628 max_align
= BITS_PER_WORD
;
26631 && AGGREGATE_TYPE_P (type
)
26632 && TYPE_SIZE (type
)
26633 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
26635 if (wi::geu_p (TYPE_SIZE (type
), max_align_compat
)
26636 && align
< max_align_compat
)
26637 align
= max_align_compat
;
26638 if (wi::geu_p (TYPE_SIZE (type
), max_align
)
26639 && align
< max_align
)
26643 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26644 to 16byte boundary. */
26647 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
26648 && TYPE_SIZE (type
)
26649 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26650 && wi::geu_p (TYPE_SIZE (type
), 128)
26658 if (TREE_CODE (type
) == ARRAY_TYPE
)
26660 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26662 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26665 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26668 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26670 if ((TYPE_MODE (type
) == XCmode
26671 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26674 else if ((TREE_CODE (type
) == RECORD_TYPE
26675 || TREE_CODE (type
) == UNION_TYPE
26676 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26677 && TYPE_FIELDS (type
))
26679 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26681 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26684 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26685 || TREE_CODE (type
) == INTEGER_TYPE
)
26687 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26689 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26696 /* Compute the alignment for a local variable or a stack slot. EXP is
26697 the data type or decl itself, MODE is the widest mode available and
26698 ALIGN is the alignment that the object would ordinarily have. The
26699 value of this macro is used instead of that alignment to align the
26703 ix86_local_alignment (tree exp
, enum machine_mode mode
,
26704 unsigned int align
)
26708 if (exp
&& DECL_P (exp
))
26710 type
= TREE_TYPE (exp
);
26719 /* Don't do dynamic stack realignment for long long objects with
26720 -mpreferred-stack-boundary=2. */
26723 && ix86_preferred_stack_boundary
< 64
26724 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26725 && (!type
|| !TYPE_USER_ALIGN (type
))
26726 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26729 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26730 register in MODE. We will return the largest alignment of XF
26734 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
26735 align
= GET_MODE_ALIGNMENT (DFmode
);
26739 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26740 to 16byte boundary. Exact wording is:
26742 An array uses the same alignment as its elements, except that a local or
26743 global array variable of length at least 16 bytes or
26744 a C99 variable-length array variable always has alignment of at least 16 bytes.
26746 This was added to allow use of aligned SSE instructions at arrays. This
26747 rule is meant for static storage (where compiler can not do the analysis
26748 by itself). We follow it for automatic variables only when convenient.
26749 We fully control everything in the function compiled and functions from
26750 other unit can not rely on the alignment.
26752 Exclude va_list type. It is the common case of local array where
26753 we can not benefit from the alignment.
26755 TODO: Probably one should optimize for size only when var is not escaping. */
26756 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
26759 if (AGGREGATE_TYPE_P (type
)
26760 && (va_list_type_node
== NULL_TREE
26761 || (TYPE_MAIN_VARIANT (type
)
26762 != TYPE_MAIN_VARIANT (va_list_type_node
)))
26763 && TYPE_SIZE (type
)
26764 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26765 && wi::geu_p (TYPE_SIZE (type
), 16)
26769 if (TREE_CODE (type
) == ARRAY_TYPE
)
26771 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26773 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26776 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26778 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26780 if ((TYPE_MODE (type
) == XCmode
26781 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26784 else if ((TREE_CODE (type
) == RECORD_TYPE
26785 || TREE_CODE (type
) == UNION_TYPE
26786 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26787 && TYPE_FIELDS (type
))
26789 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26791 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26794 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26795 || TREE_CODE (type
) == INTEGER_TYPE
)
26798 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26800 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26806 /* Compute the minimum required alignment for dynamic stack realignment
26807 purposes for a local variable, parameter or a stack slot. EXP is
26808 the data type or decl itself, MODE is its mode and ALIGN is the
26809 alignment that the object would ordinarily have. */
26812 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
26813 unsigned int align
)
26817 if (exp
&& DECL_P (exp
))
26819 type
= TREE_TYPE (exp
);
26828 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
26831 /* Don't do dynamic stack realignment for long long objects with
26832 -mpreferred-stack-boundary=2. */
26833 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26834 && (!type
|| !TYPE_USER_ALIGN (type
))
26835 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26841 /* Find a location for the static chain incoming to a nested function.
26842 This is a register, unless all free registers are used by arguments. */
26845 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
26849 if (!DECL_STATIC_CHAIN (fndecl
))
26854 /* We always use R10 in 64-bit mode. */
26862 /* By default in 32-bit mode we use ECX to pass the static chain. */
26865 fntype
= TREE_TYPE (fndecl
);
26866 ccvt
= ix86_get_callcvt (fntype
);
26867 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
26869 /* Fastcall functions use ecx/edx for arguments, which leaves
26870 us with EAX for the static chain.
26871 Thiscall functions use ecx for arguments, which also
26872 leaves us with EAX for the static chain. */
26875 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
26877 /* Thiscall functions use ecx for arguments, which leaves
26878 us with EAX and EDX for the static chain.
26879 We are using for abi-compatibility EAX. */
26882 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
26884 /* For regparm 3, we have no free call-clobbered registers in
26885 which to store the static chain. In order to implement this,
26886 we have the trampoline push the static chain to the stack.
26887 However, we can't push a value below the return address when
26888 we call the nested function directly, so we have to use an
26889 alternate entry point. For this we use ESI, and have the
26890 alternate entry point push ESI, so that things appear the
26891 same once we're executing the nested function. */
26894 if (fndecl
== current_function_decl
)
26895 ix86_static_chain_on_stack
= true;
26896 return gen_frame_mem (SImode
,
26897 plus_constant (Pmode
,
26898 arg_pointer_rtx
, -8));
26904 return gen_rtx_REG (Pmode
, regno
);
26907 /* Emit RTL insns to initialize the variable parts of a trampoline.
26908 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26909 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26910 to be passed to the target function. */
26913 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
26919 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
26925 /* Load the function address to r11. Try to load address using
26926 the shorter movl instead of movabs. We may want to support
26927 movq for kernel mode, but kernel does not use trampolines at
26928 the moment. FNADDR is a 32bit address and may not be in
26929 DImode when ptr_mode == SImode. Always use movl in this
26931 if (ptr_mode
== SImode
26932 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
26934 fnaddr
= copy_addr_to_reg (fnaddr
);
26936 mem
= adjust_address (m_tramp
, HImode
, offset
);
26937 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
26939 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
26940 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
26945 mem
= adjust_address (m_tramp
, HImode
, offset
);
26946 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
26948 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
26949 emit_move_insn (mem
, fnaddr
);
26953 /* Load static chain using movabs to r10. Use the shorter movl
26954 instead of movabs when ptr_mode == SImode. */
26955 if (ptr_mode
== SImode
)
26966 mem
= adjust_address (m_tramp
, HImode
, offset
);
26967 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
26969 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
26970 emit_move_insn (mem
, chain_value
);
26973 /* Jump to r11; the last (unused) byte is a nop, only there to
26974 pad the write out to a single 32-bit store. */
26975 mem
= adjust_address (m_tramp
, SImode
, offset
);
26976 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
26983 /* Depending on the static chain location, either load a register
26984 with a constant, or push the constant to the stack. All of the
26985 instructions are the same size. */
26986 chain
= ix86_static_chain (fndecl
, true);
26989 switch (REGNO (chain
))
26992 opcode
= 0xb8; break;
26994 opcode
= 0xb9; break;
26996 gcc_unreachable ();
27002 mem
= adjust_address (m_tramp
, QImode
, offset
);
27003 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
27005 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
27006 emit_move_insn (mem
, chain_value
);
27009 mem
= adjust_address (m_tramp
, QImode
, offset
);
27010 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
27012 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
27014 /* Compute offset from the end of the jmp to the target function.
27015 In the case in which the trampoline stores the static chain on
27016 the stack, we need to skip the first insn which pushes the
27017 (call-saved) register static chain; this push is 1 byte. */
27019 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
27020 plus_constant (Pmode
, XEXP (m_tramp
, 0),
27021 offset
- (MEM_P (chain
) ? 1 : 0)),
27022 NULL_RTX
, 1, OPTAB_DIRECT
);
27023 emit_move_insn (mem
, disp
);
27026 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
27028 #ifdef HAVE_ENABLE_EXECUTE_STACK
27029 #ifdef CHECK_EXECUTE_STACK_ENABLED
27030 if (CHECK_EXECUTE_STACK_ENABLED
)
27032 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
27033 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
27037 /* The following file contains several enumerations and data structures
27038 built from the definitions in i386-builtin-types.def. */
27040 #include "i386-builtin-types.inc"
27042 /* Table for the ix86 builtin non-function types. */
27043 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
27045 /* Retrieve an element from the above table, building some of
27046 the types lazily. */
27049 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
27051 unsigned int index
;
27054 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
27056 type
= ix86_builtin_type_tab
[(int) tcode
];
27060 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
27061 if (tcode
<= IX86_BT_LAST_VECT
)
27063 enum machine_mode mode
;
27065 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
27066 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
27067 mode
= ix86_builtin_type_vect_mode
[index
];
27069 type
= build_vector_type_for_mode (itype
, mode
);
27075 index
= tcode
- IX86_BT_LAST_VECT
- 1;
27076 if (tcode
<= IX86_BT_LAST_PTR
)
27077 quals
= TYPE_UNQUALIFIED
;
27079 quals
= TYPE_QUAL_CONST
;
27081 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
27082 if (quals
!= TYPE_UNQUALIFIED
)
27083 itype
= build_qualified_type (itype
, quals
);
27085 type
= build_pointer_type (itype
);
27088 ix86_builtin_type_tab
[(int) tcode
] = type
;
27092 /* Table for the ix86 builtin function types. */
27093 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
27095 /* Retrieve an element from the above table, building some of
27096 the types lazily. */
27099 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
27103 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
27105 type
= ix86_builtin_func_type_tab
[(int) tcode
];
27109 if (tcode
<= IX86_BT_LAST_FUNC
)
27111 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
27112 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
27113 tree rtype
, atype
, args
= void_list_node
;
27116 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
27117 for (i
= after
- 1; i
> start
; --i
)
27119 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
27120 args
= tree_cons (NULL
, atype
, args
);
27123 type
= build_function_type (rtype
, args
);
27127 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
27128 enum ix86_builtin_func_type icode
;
27130 icode
= ix86_builtin_func_alias_base
[index
];
27131 type
= ix86_get_builtin_func_type (icode
);
27134 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
27139 /* Codes for all the SSE/MMX builtins. */
27142 IX86_BUILTIN_ADDPS
,
27143 IX86_BUILTIN_ADDSS
,
27144 IX86_BUILTIN_DIVPS
,
27145 IX86_BUILTIN_DIVSS
,
27146 IX86_BUILTIN_MULPS
,
27147 IX86_BUILTIN_MULSS
,
27148 IX86_BUILTIN_SUBPS
,
27149 IX86_BUILTIN_SUBSS
,
27151 IX86_BUILTIN_CMPEQPS
,
27152 IX86_BUILTIN_CMPLTPS
,
27153 IX86_BUILTIN_CMPLEPS
,
27154 IX86_BUILTIN_CMPGTPS
,
27155 IX86_BUILTIN_CMPGEPS
,
27156 IX86_BUILTIN_CMPNEQPS
,
27157 IX86_BUILTIN_CMPNLTPS
,
27158 IX86_BUILTIN_CMPNLEPS
,
27159 IX86_BUILTIN_CMPNGTPS
,
27160 IX86_BUILTIN_CMPNGEPS
,
27161 IX86_BUILTIN_CMPORDPS
,
27162 IX86_BUILTIN_CMPUNORDPS
,
27163 IX86_BUILTIN_CMPEQSS
,
27164 IX86_BUILTIN_CMPLTSS
,
27165 IX86_BUILTIN_CMPLESS
,
27166 IX86_BUILTIN_CMPNEQSS
,
27167 IX86_BUILTIN_CMPNLTSS
,
27168 IX86_BUILTIN_CMPNLESS
,
27169 IX86_BUILTIN_CMPORDSS
,
27170 IX86_BUILTIN_CMPUNORDSS
,
27172 IX86_BUILTIN_COMIEQSS
,
27173 IX86_BUILTIN_COMILTSS
,
27174 IX86_BUILTIN_COMILESS
,
27175 IX86_BUILTIN_COMIGTSS
,
27176 IX86_BUILTIN_COMIGESS
,
27177 IX86_BUILTIN_COMINEQSS
,
27178 IX86_BUILTIN_UCOMIEQSS
,
27179 IX86_BUILTIN_UCOMILTSS
,
27180 IX86_BUILTIN_UCOMILESS
,
27181 IX86_BUILTIN_UCOMIGTSS
,
27182 IX86_BUILTIN_UCOMIGESS
,
27183 IX86_BUILTIN_UCOMINEQSS
,
27185 IX86_BUILTIN_CVTPI2PS
,
27186 IX86_BUILTIN_CVTPS2PI
,
27187 IX86_BUILTIN_CVTSI2SS
,
27188 IX86_BUILTIN_CVTSI642SS
,
27189 IX86_BUILTIN_CVTSS2SI
,
27190 IX86_BUILTIN_CVTSS2SI64
,
27191 IX86_BUILTIN_CVTTPS2PI
,
27192 IX86_BUILTIN_CVTTSS2SI
,
27193 IX86_BUILTIN_CVTTSS2SI64
,
27195 IX86_BUILTIN_MAXPS
,
27196 IX86_BUILTIN_MAXSS
,
27197 IX86_BUILTIN_MINPS
,
27198 IX86_BUILTIN_MINSS
,
27200 IX86_BUILTIN_LOADUPS
,
27201 IX86_BUILTIN_STOREUPS
,
27202 IX86_BUILTIN_MOVSS
,
27204 IX86_BUILTIN_MOVHLPS
,
27205 IX86_BUILTIN_MOVLHPS
,
27206 IX86_BUILTIN_LOADHPS
,
27207 IX86_BUILTIN_LOADLPS
,
27208 IX86_BUILTIN_STOREHPS
,
27209 IX86_BUILTIN_STORELPS
,
27211 IX86_BUILTIN_MASKMOVQ
,
27212 IX86_BUILTIN_MOVMSKPS
,
27213 IX86_BUILTIN_PMOVMSKB
,
27215 IX86_BUILTIN_MOVNTPS
,
27216 IX86_BUILTIN_MOVNTQ
,
27218 IX86_BUILTIN_LOADDQU
,
27219 IX86_BUILTIN_STOREDQU
,
27221 IX86_BUILTIN_PACKSSWB
,
27222 IX86_BUILTIN_PACKSSDW
,
27223 IX86_BUILTIN_PACKUSWB
,
27225 IX86_BUILTIN_PADDB
,
27226 IX86_BUILTIN_PADDW
,
27227 IX86_BUILTIN_PADDD
,
27228 IX86_BUILTIN_PADDQ
,
27229 IX86_BUILTIN_PADDSB
,
27230 IX86_BUILTIN_PADDSW
,
27231 IX86_BUILTIN_PADDUSB
,
27232 IX86_BUILTIN_PADDUSW
,
27233 IX86_BUILTIN_PSUBB
,
27234 IX86_BUILTIN_PSUBW
,
27235 IX86_BUILTIN_PSUBD
,
27236 IX86_BUILTIN_PSUBQ
,
27237 IX86_BUILTIN_PSUBSB
,
27238 IX86_BUILTIN_PSUBSW
,
27239 IX86_BUILTIN_PSUBUSB
,
27240 IX86_BUILTIN_PSUBUSW
,
27243 IX86_BUILTIN_PANDN
,
27247 IX86_BUILTIN_PAVGB
,
27248 IX86_BUILTIN_PAVGW
,
27250 IX86_BUILTIN_PCMPEQB
,
27251 IX86_BUILTIN_PCMPEQW
,
27252 IX86_BUILTIN_PCMPEQD
,
27253 IX86_BUILTIN_PCMPGTB
,
27254 IX86_BUILTIN_PCMPGTW
,
27255 IX86_BUILTIN_PCMPGTD
,
27257 IX86_BUILTIN_PMADDWD
,
27259 IX86_BUILTIN_PMAXSW
,
27260 IX86_BUILTIN_PMAXUB
,
27261 IX86_BUILTIN_PMINSW
,
27262 IX86_BUILTIN_PMINUB
,
27264 IX86_BUILTIN_PMULHUW
,
27265 IX86_BUILTIN_PMULHW
,
27266 IX86_BUILTIN_PMULLW
,
27268 IX86_BUILTIN_PSADBW
,
27269 IX86_BUILTIN_PSHUFW
,
27271 IX86_BUILTIN_PSLLW
,
27272 IX86_BUILTIN_PSLLD
,
27273 IX86_BUILTIN_PSLLQ
,
27274 IX86_BUILTIN_PSRAW
,
27275 IX86_BUILTIN_PSRAD
,
27276 IX86_BUILTIN_PSRLW
,
27277 IX86_BUILTIN_PSRLD
,
27278 IX86_BUILTIN_PSRLQ
,
27279 IX86_BUILTIN_PSLLWI
,
27280 IX86_BUILTIN_PSLLDI
,
27281 IX86_BUILTIN_PSLLQI
,
27282 IX86_BUILTIN_PSRAWI
,
27283 IX86_BUILTIN_PSRADI
,
27284 IX86_BUILTIN_PSRLWI
,
27285 IX86_BUILTIN_PSRLDI
,
27286 IX86_BUILTIN_PSRLQI
,
27288 IX86_BUILTIN_PUNPCKHBW
,
27289 IX86_BUILTIN_PUNPCKHWD
,
27290 IX86_BUILTIN_PUNPCKHDQ
,
27291 IX86_BUILTIN_PUNPCKLBW
,
27292 IX86_BUILTIN_PUNPCKLWD
,
27293 IX86_BUILTIN_PUNPCKLDQ
,
27295 IX86_BUILTIN_SHUFPS
,
27297 IX86_BUILTIN_RCPPS
,
27298 IX86_BUILTIN_RCPSS
,
27299 IX86_BUILTIN_RSQRTPS
,
27300 IX86_BUILTIN_RSQRTPS_NR
,
27301 IX86_BUILTIN_RSQRTSS
,
27302 IX86_BUILTIN_RSQRTF
,
27303 IX86_BUILTIN_SQRTPS
,
27304 IX86_BUILTIN_SQRTPS_NR
,
27305 IX86_BUILTIN_SQRTSS
,
27307 IX86_BUILTIN_UNPCKHPS
,
27308 IX86_BUILTIN_UNPCKLPS
,
27310 IX86_BUILTIN_ANDPS
,
27311 IX86_BUILTIN_ANDNPS
,
27313 IX86_BUILTIN_XORPS
,
27316 IX86_BUILTIN_LDMXCSR
,
27317 IX86_BUILTIN_STMXCSR
,
27318 IX86_BUILTIN_SFENCE
,
27320 IX86_BUILTIN_FXSAVE
,
27321 IX86_BUILTIN_FXRSTOR
,
27322 IX86_BUILTIN_FXSAVE64
,
27323 IX86_BUILTIN_FXRSTOR64
,
27325 IX86_BUILTIN_XSAVE
,
27326 IX86_BUILTIN_XRSTOR
,
27327 IX86_BUILTIN_XSAVE64
,
27328 IX86_BUILTIN_XRSTOR64
,
27330 IX86_BUILTIN_XSAVEOPT
,
27331 IX86_BUILTIN_XSAVEOPT64
,
27333 IX86_BUILTIN_XSAVEC
,
27334 IX86_BUILTIN_XSAVEC64
,
27336 IX86_BUILTIN_XSAVES
,
27337 IX86_BUILTIN_XRSTORS
,
27338 IX86_BUILTIN_XSAVES64
,
27339 IX86_BUILTIN_XRSTORS64
,
27341 /* 3DNow! Original */
27342 IX86_BUILTIN_FEMMS
,
27343 IX86_BUILTIN_PAVGUSB
,
27344 IX86_BUILTIN_PF2ID
,
27345 IX86_BUILTIN_PFACC
,
27346 IX86_BUILTIN_PFADD
,
27347 IX86_BUILTIN_PFCMPEQ
,
27348 IX86_BUILTIN_PFCMPGE
,
27349 IX86_BUILTIN_PFCMPGT
,
27350 IX86_BUILTIN_PFMAX
,
27351 IX86_BUILTIN_PFMIN
,
27352 IX86_BUILTIN_PFMUL
,
27353 IX86_BUILTIN_PFRCP
,
27354 IX86_BUILTIN_PFRCPIT1
,
27355 IX86_BUILTIN_PFRCPIT2
,
27356 IX86_BUILTIN_PFRSQIT1
,
27357 IX86_BUILTIN_PFRSQRT
,
27358 IX86_BUILTIN_PFSUB
,
27359 IX86_BUILTIN_PFSUBR
,
27360 IX86_BUILTIN_PI2FD
,
27361 IX86_BUILTIN_PMULHRW
,
27363 /* 3DNow! Athlon Extensions */
27364 IX86_BUILTIN_PF2IW
,
27365 IX86_BUILTIN_PFNACC
,
27366 IX86_BUILTIN_PFPNACC
,
27367 IX86_BUILTIN_PI2FW
,
27368 IX86_BUILTIN_PSWAPDSI
,
27369 IX86_BUILTIN_PSWAPDSF
,
27372 IX86_BUILTIN_ADDPD
,
27373 IX86_BUILTIN_ADDSD
,
27374 IX86_BUILTIN_DIVPD
,
27375 IX86_BUILTIN_DIVSD
,
27376 IX86_BUILTIN_MULPD
,
27377 IX86_BUILTIN_MULSD
,
27378 IX86_BUILTIN_SUBPD
,
27379 IX86_BUILTIN_SUBSD
,
27381 IX86_BUILTIN_CMPEQPD
,
27382 IX86_BUILTIN_CMPLTPD
,
27383 IX86_BUILTIN_CMPLEPD
,
27384 IX86_BUILTIN_CMPGTPD
,
27385 IX86_BUILTIN_CMPGEPD
,
27386 IX86_BUILTIN_CMPNEQPD
,
27387 IX86_BUILTIN_CMPNLTPD
,
27388 IX86_BUILTIN_CMPNLEPD
,
27389 IX86_BUILTIN_CMPNGTPD
,
27390 IX86_BUILTIN_CMPNGEPD
,
27391 IX86_BUILTIN_CMPORDPD
,
27392 IX86_BUILTIN_CMPUNORDPD
,
27393 IX86_BUILTIN_CMPEQSD
,
27394 IX86_BUILTIN_CMPLTSD
,
27395 IX86_BUILTIN_CMPLESD
,
27396 IX86_BUILTIN_CMPNEQSD
,
27397 IX86_BUILTIN_CMPNLTSD
,
27398 IX86_BUILTIN_CMPNLESD
,
27399 IX86_BUILTIN_CMPORDSD
,
27400 IX86_BUILTIN_CMPUNORDSD
,
27402 IX86_BUILTIN_COMIEQSD
,
27403 IX86_BUILTIN_COMILTSD
,
27404 IX86_BUILTIN_COMILESD
,
27405 IX86_BUILTIN_COMIGTSD
,
27406 IX86_BUILTIN_COMIGESD
,
27407 IX86_BUILTIN_COMINEQSD
,
27408 IX86_BUILTIN_UCOMIEQSD
,
27409 IX86_BUILTIN_UCOMILTSD
,
27410 IX86_BUILTIN_UCOMILESD
,
27411 IX86_BUILTIN_UCOMIGTSD
,
27412 IX86_BUILTIN_UCOMIGESD
,
27413 IX86_BUILTIN_UCOMINEQSD
,
27415 IX86_BUILTIN_MAXPD
,
27416 IX86_BUILTIN_MAXSD
,
27417 IX86_BUILTIN_MINPD
,
27418 IX86_BUILTIN_MINSD
,
27420 IX86_BUILTIN_ANDPD
,
27421 IX86_BUILTIN_ANDNPD
,
27423 IX86_BUILTIN_XORPD
,
27425 IX86_BUILTIN_SQRTPD
,
27426 IX86_BUILTIN_SQRTSD
,
27428 IX86_BUILTIN_UNPCKHPD
,
27429 IX86_BUILTIN_UNPCKLPD
,
27431 IX86_BUILTIN_SHUFPD
,
27433 IX86_BUILTIN_LOADUPD
,
27434 IX86_BUILTIN_STOREUPD
,
27435 IX86_BUILTIN_MOVSD
,
27437 IX86_BUILTIN_LOADHPD
,
27438 IX86_BUILTIN_LOADLPD
,
27440 IX86_BUILTIN_CVTDQ2PD
,
27441 IX86_BUILTIN_CVTDQ2PS
,
27443 IX86_BUILTIN_CVTPD2DQ
,
27444 IX86_BUILTIN_CVTPD2PI
,
27445 IX86_BUILTIN_CVTPD2PS
,
27446 IX86_BUILTIN_CVTTPD2DQ
,
27447 IX86_BUILTIN_CVTTPD2PI
,
27449 IX86_BUILTIN_CVTPI2PD
,
27450 IX86_BUILTIN_CVTSI2SD
,
27451 IX86_BUILTIN_CVTSI642SD
,
27453 IX86_BUILTIN_CVTSD2SI
,
27454 IX86_BUILTIN_CVTSD2SI64
,
27455 IX86_BUILTIN_CVTSD2SS
,
27456 IX86_BUILTIN_CVTSS2SD
,
27457 IX86_BUILTIN_CVTTSD2SI
,
27458 IX86_BUILTIN_CVTTSD2SI64
,
27460 IX86_BUILTIN_CVTPS2DQ
,
27461 IX86_BUILTIN_CVTPS2PD
,
27462 IX86_BUILTIN_CVTTPS2DQ
,
27464 IX86_BUILTIN_MOVNTI
,
27465 IX86_BUILTIN_MOVNTI64
,
27466 IX86_BUILTIN_MOVNTPD
,
27467 IX86_BUILTIN_MOVNTDQ
,
27469 IX86_BUILTIN_MOVQ128
,
27472 IX86_BUILTIN_MASKMOVDQU
,
27473 IX86_BUILTIN_MOVMSKPD
,
27474 IX86_BUILTIN_PMOVMSKB128
,
27476 IX86_BUILTIN_PACKSSWB128
,
27477 IX86_BUILTIN_PACKSSDW128
,
27478 IX86_BUILTIN_PACKUSWB128
,
27480 IX86_BUILTIN_PADDB128
,
27481 IX86_BUILTIN_PADDW128
,
27482 IX86_BUILTIN_PADDD128
,
27483 IX86_BUILTIN_PADDQ128
,
27484 IX86_BUILTIN_PADDSB128
,
27485 IX86_BUILTIN_PADDSW128
,
27486 IX86_BUILTIN_PADDUSB128
,
27487 IX86_BUILTIN_PADDUSW128
,
27488 IX86_BUILTIN_PSUBB128
,
27489 IX86_BUILTIN_PSUBW128
,
27490 IX86_BUILTIN_PSUBD128
,
27491 IX86_BUILTIN_PSUBQ128
,
27492 IX86_BUILTIN_PSUBSB128
,
27493 IX86_BUILTIN_PSUBSW128
,
27494 IX86_BUILTIN_PSUBUSB128
,
27495 IX86_BUILTIN_PSUBUSW128
,
27497 IX86_BUILTIN_PAND128
,
27498 IX86_BUILTIN_PANDN128
,
27499 IX86_BUILTIN_POR128
,
27500 IX86_BUILTIN_PXOR128
,
27502 IX86_BUILTIN_PAVGB128
,
27503 IX86_BUILTIN_PAVGW128
,
27505 IX86_BUILTIN_PCMPEQB128
,
27506 IX86_BUILTIN_PCMPEQW128
,
27507 IX86_BUILTIN_PCMPEQD128
,
27508 IX86_BUILTIN_PCMPGTB128
,
27509 IX86_BUILTIN_PCMPGTW128
,
27510 IX86_BUILTIN_PCMPGTD128
,
27512 IX86_BUILTIN_PMADDWD128
,
27514 IX86_BUILTIN_PMAXSW128
,
27515 IX86_BUILTIN_PMAXUB128
,
27516 IX86_BUILTIN_PMINSW128
,
27517 IX86_BUILTIN_PMINUB128
,
27519 IX86_BUILTIN_PMULUDQ
,
27520 IX86_BUILTIN_PMULUDQ128
,
27521 IX86_BUILTIN_PMULHUW128
,
27522 IX86_BUILTIN_PMULHW128
,
27523 IX86_BUILTIN_PMULLW128
,
27525 IX86_BUILTIN_PSADBW128
,
27526 IX86_BUILTIN_PSHUFHW
,
27527 IX86_BUILTIN_PSHUFLW
,
27528 IX86_BUILTIN_PSHUFD
,
27530 IX86_BUILTIN_PSLLDQI128
,
27531 IX86_BUILTIN_PSLLWI128
,
27532 IX86_BUILTIN_PSLLDI128
,
27533 IX86_BUILTIN_PSLLQI128
,
27534 IX86_BUILTIN_PSRAWI128
,
27535 IX86_BUILTIN_PSRADI128
,
27536 IX86_BUILTIN_PSRLDQI128
,
27537 IX86_BUILTIN_PSRLWI128
,
27538 IX86_BUILTIN_PSRLDI128
,
27539 IX86_BUILTIN_PSRLQI128
,
27541 IX86_BUILTIN_PSLLDQ128
,
27542 IX86_BUILTIN_PSLLW128
,
27543 IX86_BUILTIN_PSLLD128
,
27544 IX86_BUILTIN_PSLLQ128
,
27545 IX86_BUILTIN_PSRAW128
,
27546 IX86_BUILTIN_PSRAD128
,
27547 IX86_BUILTIN_PSRLW128
,
27548 IX86_BUILTIN_PSRLD128
,
27549 IX86_BUILTIN_PSRLQ128
,
27551 IX86_BUILTIN_PUNPCKHBW128
,
27552 IX86_BUILTIN_PUNPCKHWD128
,
27553 IX86_BUILTIN_PUNPCKHDQ128
,
27554 IX86_BUILTIN_PUNPCKHQDQ128
,
27555 IX86_BUILTIN_PUNPCKLBW128
,
27556 IX86_BUILTIN_PUNPCKLWD128
,
27557 IX86_BUILTIN_PUNPCKLDQ128
,
27558 IX86_BUILTIN_PUNPCKLQDQ128
,
27560 IX86_BUILTIN_CLFLUSH
,
27561 IX86_BUILTIN_MFENCE
,
27562 IX86_BUILTIN_LFENCE
,
27563 IX86_BUILTIN_PAUSE
,
27565 IX86_BUILTIN_FNSTENV
,
27566 IX86_BUILTIN_FLDENV
,
27567 IX86_BUILTIN_FNSTSW
,
27568 IX86_BUILTIN_FNCLEX
,
27570 IX86_BUILTIN_BSRSI
,
27571 IX86_BUILTIN_BSRDI
,
27572 IX86_BUILTIN_RDPMC
,
27573 IX86_BUILTIN_RDTSC
,
27574 IX86_BUILTIN_RDTSCP
,
27575 IX86_BUILTIN_ROLQI
,
27576 IX86_BUILTIN_ROLHI
,
27577 IX86_BUILTIN_RORQI
,
27578 IX86_BUILTIN_RORHI
,
27581 IX86_BUILTIN_ADDSUBPS
,
27582 IX86_BUILTIN_HADDPS
,
27583 IX86_BUILTIN_HSUBPS
,
27584 IX86_BUILTIN_MOVSHDUP
,
27585 IX86_BUILTIN_MOVSLDUP
,
27586 IX86_BUILTIN_ADDSUBPD
,
27587 IX86_BUILTIN_HADDPD
,
27588 IX86_BUILTIN_HSUBPD
,
27589 IX86_BUILTIN_LDDQU
,
27591 IX86_BUILTIN_MONITOR
,
27592 IX86_BUILTIN_MWAIT
,
27595 IX86_BUILTIN_PHADDW
,
27596 IX86_BUILTIN_PHADDD
,
27597 IX86_BUILTIN_PHADDSW
,
27598 IX86_BUILTIN_PHSUBW
,
27599 IX86_BUILTIN_PHSUBD
,
27600 IX86_BUILTIN_PHSUBSW
,
27601 IX86_BUILTIN_PMADDUBSW
,
27602 IX86_BUILTIN_PMULHRSW
,
27603 IX86_BUILTIN_PSHUFB
,
27604 IX86_BUILTIN_PSIGNB
,
27605 IX86_BUILTIN_PSIGNW
,
27606 IX86_BUILTIN_PSIGND
,
27607 IX86_BUILTIN_PALIGNR
,
27608 IX86_BUILTIN_PABSB
,
27609 IX86_BUILTIN_PABSW
,
27610 IX86_BUILTIN_PABSD
,
27612 IX86_BUILTIN_PHADDW128
,
27613 IX86_BUILTIN_PHADDD128
,
27614 IX86_BUILTIN_PHADDSW128
,
27615 IX86_BUILTIN_PHSUBW128
,
27616 IX86_BUILTIN_PHSUBD128
,
27617 IX86_BUILTIN_PHSUBSW128
,
27618 IX86_BUILTIN_PMADDUBSW128
,
27619 IX86_BUILTIN_PMULHRSW128
,
27620 IX86_BUILTIN_PSHUFB128
,
27621 IX86_BUILTIN_PSIGNB128
,
27622 IX86_BUILTIN_PSIGNW128
,
27623 IX86_BUILTIN_PSIGND128
,
27624 IX86_BUILTIN_PALIGNR128
,
27625 IX86_BUILTIN_PABSB128
,
27626 IX86_BUILTIN_PABSW128
,
27627 IX86_BUILTIN_PABSD128
,
27629 /* AMDFAM10 - SSE4A New Instructions. */
27630 IX86_BUILTIN_MOVNTSD
,
27631 IX86_BUILTIN_MOVNTSS
,
27632 IX86_BUILTIN_EXTRQI
,
27633 IX86_BUILTIN_EXTRQ
,
27634 IX86_BUILTIN_INSERTQI
,
27635 IX86_BUILTIN_INSERTQ
,
27638 IX86_BUILTIN_BLENDPD
,
27639 IX86_BUILTIN_BLENDPS
,
27640 IX86_BUILTIN_BLENDVPD
,
27641 IX86_BUILTIN_BLENDVPS
,
27642 IX86_BUILTIN_PBLENDVB128
,
27643 IX86_BUILTIN_PBLENDW128
,
27648 IX86_BUILTIN_INSERTPS128
,
27650 IX86_BUILTIN_MOVNTDQA
,
27651 IX86_BUILTIN_MPSADBW128
,
27652 IX86_BUILTIN_PACKUSDW128
,
27653 IX86_BUILTIN_PCMPEQQ
,
27654 IX86_BUILTIN_PHMINPOSUW128
,
27656 IX86_BUILTIN_PMAXSB128
,
27657 IX86_BUILTIN_PMAXSD128
,
27658 IX86_BUILTIN_PMAXUD128
,
27659 IX86_BUILTIN_PMAXUW128
,
27661 IX86_BUILTIN_PMINSB128
,
27662 IX86_BUILTIN_PMINSD128
,
27663 IX86_BUILTIN_PMINUD128
,
27664 IX86_BUILTIN_PMINUW128
,
27666 IX86_BUILTIN_PMOVSXBW128
,
27667 IX86_BUILTIN_PMOVSXBD128
,
27668 IX86_BUILTIN_PMOVSXBQ128
,
27669 IX86_BUILTIN_PMOVSXWD128
,
27670 IX86_BUILTIN_PMOVSXWQ128
,
27671 IX86_BUILTIN_PMOVSXDQ128
,
27673 IX86_BUILTIN_PMOVZXBW128
,
27674 IX86_BUILTIN_PMOVZXBD128
,
27675 IX86_BUILTIN_PMOVZXBQ128
,
27676 IX86_BUILTIN_PMOVZXWD128
,
27677 IX86_BUILTIN_PMOVZXWQ128
,
27678 IX86_BUILTIN_PMOVZXDQ128
,
27680 IX86_BUILTIN_PMULDQ128
,
27681 IX86_BUILTIN_PMULLD128
,
27683 IX86_BUILTIN_ROUNDSD
,
27684 IX86_BUILTIN_ROUNDSS
,
27686 IX86_BUILTIN_ROUNDPD
,
27687 IX86_BUILTIN_ROUNDPS
,
27689 IX86_BUILTIN_FLOORPD
,
27690 IX86_BUILTIN_CEILPD
,
27691 IX86_BUILTIN_TRUNCPD
,
27692 IX86_BUILTIN_RINTPD
,
27693 IX86_BUILTIN_ROUNDPD_AZ
,
27695 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
27696 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
27697 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
27699 IX86_BUILTIN_FLOORPS
,
27700 IX86_BUILTIN_CEILPS
,
27701 IX86_BUILTIN_TRUNCPS
,
27702 IX86_BUILTIN_RINTPS
,
27703 IX86_BUILTIN_ROUNDPS_AZ
,
27705 IX86_BUILTIN_FLOORPS_SFIX
,
27706 IX86_BUILTIN_CEILPS_SFIX
,
27707 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
27709 IX86_BUILTIN_PTESTZ
,
27710 IX86_BUILTIN_PTESTC
,
27711 IX86_BUILTIN_PTESTNZC
,
27713 IX86_BUILTIN_VEC_INIT_V2SI
,
27714 IX86_BUILTIN_VEC_INIT_V4HI
,
27715 IX86_BUILTIN_VEC_INIT_V8QI
,
27716 IX86_BUILTIN_VEC_EXT_V2DF
,
27717 IX86_BUILTIN_VEC_EXT_V2DI
,
27718 IX86_BUILTIN_VEC_EXT_V4SF
,
27719 IX86_BUILTIN_VEC_EXT_V4SI
,
27720 IX86_BUILTIN_VEC_EXT_V8HI
,
27721 IX86_BUILTIN_VEC_EXT_V2SI
,
27722 IX86_BUILTIN_VEC_EXT_V4HI
,
27723 IX86_BUILTIN_VEC_EXT_V16QI
,
27724 IX86_BUILTIN_VEC_SET_V2DI
,
27725 IX86_BUILTIN_VEC_SET_V4SF
,
27726 IX86_BUILTIN_VEC_SET_V4SI
,
27727 IX86_BUILTIN_VEC_SET_V8HI
,
27728 IX86_BUILTIN_VEC_SET_V4HI
,
27729 IX86_BUILTIN_VEC_SET_V16QI
,
27731 IX86_BUILTIN_VEC_PACK_SFIX
,
27732 IX86_BUILTIN_VEC_PACK_SFIX256
,
27735 IX86_BUILTIN_CRC32QI
,
27736 IX86_BUILTIN_CRC32HI
,
27737 IX86_BUILTIN_CRC32SI
,
27738 IX86_BUILTIN_CRC32DI
,
27740 IX86_BUILTIN_PCMPESTRI128
,
27741 IX86_BUILTIN_PCMPESTRM128
,
27742 IX86_BUILTIN_PCMPESTRA128
,
27743 IX86_BUILTIN_PCMPESTRC128
,
27744 IX86_BUILTIN_PCMPESTRO128
,
27745 IX86_BUILTIN_PCMPESTRS128
,
27746 IX86_BUILTIN_PCMPESTRZ128
,
27747 IX86_BUILTIN_PCMPISTRI128
,
27748 IX86_BUILTIN_PCMPISTRM128
,
27749 IX86_BUILTIN_PCMPISTRA128
,
27750 IX86_BUILTIN_PCMPISTRC128
,
27751 IX86_BUILTIN_PCMPISTRO128
,
27752 IX86_BUILTIN_PCMPISTRS128
,
27753 IX86_BUILTIN_PCMPISTRZ128
,
27755 IX86_BUILTIN_PCMPGTQ
,
27757 /* AES instructions */
27758 IX86_BUILTIN_AESENC128
,
27759 IX86_BUILTIN_AESENCLAST128
,
27760 IX86_BUILTIN_AESDEC128
,
27761 IX86_BUILTIN_AESDECLAST128
,
27762 IX86_BUILTIN_AESIMC128
,
27763 IX86_BUILTIN_AESKEYGENASSIST128
,
27765 /* PCLMUL instruction */
27766 IX86_BUILTIN_PCLMULQDQ128
,
27769 IX86_BUILTIN_ADDPD256
,
27770 IX86_BUILTIN_ADDPS256
,
27771 IX86_BUILTIN_ADDSUBPD256
,
27772 IX86_BUILTIN_ADDSUBPS256
,
27773 IX86_BUILTIN_ANDPD256
,
27774 IX86_BUILTIN_ANDPS256
,
27775 IX86_BUILTIN_ANDNPD256
,
27776 IX86_BUILTIN_ANDNPS256
,
27777 IX86_BUILTIN_BLENDPD256
,
27778 IX86_BUILTIN_BLENDPS256
,
27779 IX86_BUILTIN_BLENDVPD256
,
27780 IX86_BUILTIN_BLENDVPS256
,
27781 IX86_BUILTIN_DIVPD256
,
27782 IX86_BUILTIN_DIVPS256
,
27783 IX86_BUILTIN_DPPS256
,
27784 IX86_BUILTIN_HADDPD256
,
27785 IX86_BUILTIN_HADDPS256
,
27786 IX86_BUILTIN_HSUBPD256
,
27787 IX86_BUILTIN_HSUBPS256
,
27788 IX86_BUILTIN_MAXPD256
,
27789 IX86_BUILTIN_MAXPS256
,
27790 IX86_BUILTIN_MINPD256
,
27791 IX86_BUILTIN_MINPS256
,
27792 IX86_BUILTIN_MULPD256
,
27793 IX86_BUILTIN_MULPS256
,
27794 IX86_BUILTIN_ORPD256
,
27795 IX86_BUILTIN_ORPS256
,
27796 IX86_BUILTIN_SHUFPD256
,
27797 IX86_BUILTIN_SHUFPS256
,
27798 IX86_BUILTIN_SUBPD256
,
27799 IX86_BUILTIN_SUBPS256
,
27800 IX86_BUILTIN_XORPD256
,
27801 IX86_BUILTIN_XORPS256
,
27802 IX86_BUILTIN_CMPSD
,
27803 IX86_BUILTIN_CMPSS
,
27804 IX86_BUILTIN_CMPPD
,
27805 IX86_BUILTIN_CMPPS
,
27806 IX86_BUILTIN_CMPPD256
,
27807 IX86_BUILTIN_CMPPS256
,
27808 IX86_BUILTIN_CVTDQ2PD256
,
27809 IX86_BUILTIN_CVTDQ2PS256
,
27810 IX86_BUILTIN_CVTPD2PS256
,
27811 IX86_BUILTIN_CVTPS2DQ256
,
27812 IX86_BUILTIN_CVTPS2PD256
,
27813 IX86_BUILTIN_CVTTPD2DQ256
,
27814 IX86_BUILTIN_CVTPD2DQ256
,
27815 IX86_BUILTIN_CVTTPS2DQ256
,
27816 IX86_BUILTIN_EXTRACTF128PD256
,
27817 IX86_BUILTIN_EXTRACTF128PS256
,
27818 IX86_BUILTIN_EXTRACTF128SI256
,
27819 IX86_BUILTIN_VZEROALL
,
27820 IX86_BUILTIN_VZEROUPPER
,
27821 IX86_BUILTIN_VPERMILVARPD
,
27822 IX86_BUILTIN_VPERMILVARPS
,
27823 IX86_BUILTIN_VPERMILVARPD256
,
27824 IX86_BUILTIN_VPERMILVARPS256
,
27825 IX86_BUILTIN_VPERMILPD
,
27826 IX86_BUILTIN_VPERMILPS
,
27827 IX86_BUILTIN_VPERMILPD256
,
27828 IX86_BUILTIN_VPERMILPS256
,
27829 IX86_BUILTIN_VPERMIL2PD
,
27830 IX86_BUILTIN_VPERMIL2PS
,
27831 IX86_BUILTIN_VPERMIL2PD256
,
27832 IX86_BUILTIN_VPERMIL2PS256
,
27833 IX86_BUILTIN_VPERM2F128PD256
,
27834 IX86_BUILTIN_VPERM2F128PS256
,
27835 IX86_BUILTIN_VPERM2F128SI256
,
27836 IX86_BUILTIN_VBROADCASTSS
,
27837 IX86_BUILTIN_VBROADCASTSD256
,
27838 IX86_BUILTIN_VBROADCASTSS256
,
27839 IX86_BUILTIN_VBROADCASTPD256
,
27840 IX86_BUILTIN_VBROADCASTPS256
,
27841 IX86_BUILTIN_VINSERTF128PD256
,
27842 IX86_BUILTIN_VINSERTF128PS256
,
27843 IX86_BUILTIN_VINSERTF128SI256
,
27844 IX86_BUILTIN_LOADUPD256
,
27845 IX86_BUILTIN_LOADUPS256
,
27846 IX86_BUILTIN_STOREUPD256
,
27847 IX86_BUILTIN_STOREUPS256
,
27848 IX86_BUILTIN_LDDQU256
,
27849 IX86_BUILTIN_MOVNTDQ256
,
27850 IX86_BUILTIN_MOVNTPD256
,
27851 IX86_BUILTIN_MOVNTPS256
,
27852 IX86_BUILTIN_LOADDQU256
,
27853 IX86_BUILTIN_STOREDQU256
,
27854 IX86_BUILTIN_MASKLOADPD
,
27855 IX86_BUILTIN_MASKLOADPS
,
27856 IX86_BUILTIN_MASKSTOREPD
,
27857 IX86_BUILTIN_MASKSTOREPS
,
27858 IX86_BUILTIN_MASKLOADPD256
,
27859 IX86_BUILTIN_MASKLOADPS256
,
27860 IX86_BUILTIN_MASKSTOREPD256
,
27861 IX86_BUILTIN_MASKSTOREPS256
,
27862 IX86_BUILTIN_MOVSHDUP256
,
27863 IX86_BUILTIN_MOVSLDUP256
,
27864 IX86_BUILTIN_MOVDDUP256
,
27866 IX86_BUILTIN_SQRTPD256
,
27867 IX86_BUILTIN_SQRTPS256
,
27868 IX86_BUILTIN_SQRTPS_NR256
,
27869 IX86_BUILTIN_RSQRTPS256
,
27870 IX86_BUILTIN_RSQRTPS_NR256
,
27872 IX86_BUILTIN_RCPPS256
,
27874 IX86_BUILTIN_ROUNDPD256
,
27875 IX86_BUILTIN_ROUNDPS256
,
27877 IX86_BUILTIN_FLOORPD256
,
27878 IX86_BUILTIN_CEILPD256
,
27879 IX86_BUILTIN_TRUNCPD256
,
27880 IX86_BUILTIN_RINTPD256
,
27881 IX86_BUILTIN_ROUNDPD_AZ256
,
27883 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
27884 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
27885 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
27887 IX86_BUILTIN_FLOORPS256
,
27888 IX86_BUILTIN_CEILPS256
,
27889 IX86_BUILTIN_TRUNCPS256
,
27890 IX86_BUILTIN_RINTPS256
,
27891 IX86_BUILTIN_ROUNDPS_AZ256
,
27893 IX86_BUILTIN_FLOORPS_SFIX256
,
27894 IX86_BUILTIN_CEILPS_SFIX256
,
27895 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
27897 IX86_BUILTIN_UNPCKHPD256
,
27898 IX86_BUILTIN_UNPCKLPD256
,
27899 IX86_BUILTIN_UNPCKHPS256
,
27900 IX86_BUILTIN_UNPCKLPS256
,
27902 IX86_BUILTIN_SI256_SI
,
27903 IX86_BUILTIN_PS256_PS
,
27904 IX86_BUILTIN_PD256_PD
,
27905 IX86_BUILTIN_SI_SI256
,
27906 IX86_BUILTIN_PS_PS256
,
27907 IX86_BUILTIN_PD_PD256
,
27909 IX86_BUILTIN_VTESTZPD
,
27910 IX86_BUILTIN_VTESTCPD
,
27911 IX86_BUILTIN_VTESTNZCPD
,
27912 IX86_BUILTIN_VTESTZPS
,
27913 IX86_BUILTIN_VTESTCPS
,
27914 IX86_BUILTIN_VTESTNZCPS
,
27915 IX86_BUILTIN_VTESTZPD256
,
27916 IX86_BUILTIN_VTESTCPD256
,
27917 IX86_BUILTIN_VTESTNZCPD256
,
27918 IX86_BUILTIN_VTESTZPS256
,
27919 IX86_BUILTIN_VTESTCPS256
,
27920 IX86_BUILTIN_VTESTNZCPS256
,
27921 IX86_BUILTIN_PTESTZ256
,
27922 IX86_BUILTIN_PTESTC256
,
27923 IX86_BUILTIN_PTESTNZC256
,
27925 IX86_BUILTIN_MOVMSKPD256
,
27926 IX86_BUILTIN_MOVMSKPS256
,
27929 IX86_BUILTIN_MPSADBW256
,
27930 IX86_BUILTIN_PABSB256
,
27931 IX86_BUILTIN_PABSW256
,
27932 IX86_BUILTIN_PABSD256
,
27933 IX86_BUILTIN_PACKSSDW256
,
27934 IX86_BUILTIN_PACKSSWB256
,
27935 IX86_BUILTIN_PACKUSDW256
,
27936 IX86_BUILTIN_PACKUSWB256
,
27937 IX86_BUILTIN_PADDB256
,
27938 IX86_BUILTIN_PADDW256
,
27939 IX86_BUILTIN_PADDD256
,
27940 IX86_BUILTIN_PADDQ256
,
27941 IX86_BUILTIN_PADDSB256
,
27942 IX86_BUILTIN_PADDSW256
,
27943 IX86_BUILTIN_PADDUSB256
,
27944 IX86_BUILTIN_PADDUSW256
,
27945 IX86_BUILTIN_PALIGNR256
,
27946 IX86_BUILTIN_AND256I
,
27947 IX86_BUILTIN_ANDNOT256I
,
27948 IX86_BUILTIN_PAVGB256
,
27949 IX86_BUILTIN_PAVGW256
,
27950 IX86_BUILTIN_PBLENDVB256
,
27951 IX86_BUILTIN_PBLENDVW256
,
27952 IX86_BUILTIN_PCMPEQB256
,
27953 IX86_BUILTIN_PCMPEQW256
,
27954 IX86_BUILTIN_PCMPEQD256
,
27955 IX86_BUILTIN_PCMPEQQ256
,
27956 IX86_BUILTIN_PCMPGTB256
,
27957 IX86_BUILTIN_PCMPGTW256
,
27958 IX86_BUILTIN_PCMPGTD256
,
27959 IX86_BUILTIN_PCMPGTQ256
,
27960 IX86_BUILTIN_PHADDW256
,
27961 IX86_BUILTIN_PHADDD256
,
27962 IX86_BUILTIN_PHADDSW256
,
27963 IX86_BUILTIN_PHSUBW256
,
27964 IX86_BUILTIN_PHSUBD256
,
27965 IX86_BUILTIN_PHSUBSW256
,
27966 IX86_BUILTIN_PMADDUBSW256
,
27967 IX86_BUILTIN_PMADDWD256
,
27968 IX86_BUILTIN_PMAXSB256
,
27969 IX86_BUILTIN_PMAXSW256
,
27970 IX86_BUILTIN_PMAXSD256
,
27971 IX86_BUILTIN_PMAXUB256
,
27972 IX86_BUILTIN_PMAXUW256
,
27973 IX86_BUILTIN_PMAXUD256
,
27974 IX86_BUILTIN_PMINSB256
,
27975 IX86_BUILTIN_PMINSW256
,
27976 IX86_BUILTIN_PMINSD256
,
27977 IX86_BUILTIN_PMINUB256
,
27978 IX86_BUILTIN_PMINUW256
,
27979 IX86_BUILTIN_PMINUD256
,
27980 IX86_BUILTIN_PMOVMSKB256
,
27981 IX86_BUILTIN_PMOVSXBW256
,
27982 IX86_BUILTIN_PMOVSXBD256
,
27983 IX86_BUILTIN_PMOVSXBQ256
,
27984 IX86_BUILTIN_PMOVSXWD256
,
27985 IX86_BUILTIN_PMOVSXWQ256
,
27986 IX86_BUILTIN_PMOVSXDQ256
,
27987 IX86_BUILTIN_PMOVZXBW256
,
27988 IX86_BUILTIN_PMOVZXBD256
,
27989 IX86_BUILTIN_PMOVZXBQ256
,
27990 IX86_BUILTIN_PMOVZXWD256
,
27991 IX86_BUILTIN_PMOVZXWQ256
,
27992 IX86_BUILTIN_PMOVZXDQ256
,
27993 IX86_BUILTIN_PMULDQ256
,
27994 IX86_BUILTIN_PMULHRSW256
,
27995 IX86_BUILTIN_PMULHUW256
,
27996 IX86_BUILTIN_PMULHW256
,
27997 IX86_BUILTIN_PMULLW256
,
27998 IX86_BUILTIN_PMULLD256
,
27999 IX86_BUILTIN_PMULUDQ256
,
28000 IX86_BUILTIN_POR256
,
28001 IX86_BUILTIN_PSADBW256
,
28002 IX86_BUILTIN_PSHUFB256
,
28003 IX86_BUILTIN_PSHUFD256
,
28004 IX86_BUILTIN_PSHUFHW256
,
28005 IX86_BUILTIN_PSHUFLW256
,
28006 IX86_BUILTIN_PSIGNB256
,
28007 IX86_BUILTIN_PSIGNW256
,
28008 IX86_BUILTIN_PSIGND256
,
28009 IX86_BUILTIN_PSLLDQI256
,
28010 IX86_BUILTIN_PSLLWI256
,
28011 IX86_BUILTIN_PSLLW256
,
28012 IX86_BUILTIN_PSLLDI256
,
28013 IX86_BUILTIN_PSLLD256
,
28014 IX86_BUILTIN_PSLLQI256
,
28015 IX86_BUILTIN_PSLLQ256
,
28016 IX86_BUILTIN_PSRAWI256
,
28017 IX86_BUILTIN_PSRAW256
,
28018 IX86_BUILTIN_PSRADI256
,
28019 IX86_BUILTIN_PSRAD256
,
28020 IX86_BUILTIN_PSRLDQI256
,
28021 IX86_BUILTIN_PSRLWI256
,
28022 IX86_BUILTIN_PSRLW256
,
28023 IX86_BUILTIN_PSRLDI256
,
28024 IX86_BUILTIN_PSRLD256
,
28025 IX86_BUILTIN_PSRLQI256
,
28026 IX86_BUILTIN_PSRLQ256
,
28027 IX86_BUILTIN_PSUBB256
,
28028 IX86_BUILTIN_PSUBW256
,
28029 IX86_BUILTIN_PSUBD256
,
28030 IX86_BUILTIN_PSUBQ256
,
28031 IX86_BUILTIN_PSUBSB256
,
28032 IX86_BUILTIN_PSUBSW256
,
28033 IX86_BUILTIN_PSUBUSB256
,
28034 IX86_BUILTIN_PSUBUSW256
,
28035 IX86_BUILTIN_PUNPCKHBW256
,
28036 IX86_BUILTIN_PUNPCKHWD256
,
28037 IX86_BUILTIN_PUNPCKHDQ256
,
28038 IX86_BUILTIN_PUNPCKHQDQ256
,
28039 IX86_BUILTIN_PUNPCKLBW256
,
28040 IX86_BUILTIN_PUNPCKLWD256
,
28041 IX86_BUILTIN_PUNPCKLDQ256
,
28042 IX86_BUILTIN_PUNPCKLQDQ256
,
28043 IX86_BUILTIN_PXOR256
,
28044 IX86_BUILTIN_MOVNTDQA256
,
28045 IX86_BUILTIN_VBROADCASTSS_PS
,
28046 IX86_BUILTIN_VBROADCASTSS_PS256
,
28047 IX86_BUILTIN_VBROADCASTSD_PD256
,
28048 IX86_BUILTIN_VBROADCASTSI256
,
28049 IX86_BUILTIN_PBLENDD256
,
28050 IX86_BUILTIN_PBLENDD128
,
28051 IX86_BUILTIN_PBROADCASTB256
,
28052 IX86_BUILTIN_PBROADCASTW256
,
28053 IX86_BUILTIN_PBROADCASTD256
,
28054 IX86_BUILTIN_PBROADCASTQ256
,
28055 IX86_BUILTIN_PBROADCASTB128
,
28056 IX86_BUILTIN_PBROADCASTW128
,
28057 IX86_BUILTIN_PBROADCASTD128
,
28058 IX86_BUILTIN_PBROADCASTQ128
,
28059 IX86_BUILTIN_VPERMVARSI256
,
28060 IX86_BUILTIN_VPERMDF256
,
28061 IX86_BUILTIN_VPERMVARSF256
,
28062 IX86_BUILTIN_VPERMDI256
,
28063 IX86_BUILTIN_VPERMTI256
,
28064 IX86_BUILTIN_VEXTRACT128I256
,
28065 IX86_BUILTIN_VINSERT128I256
,
28066 IX86_BUILTIN_MASKLOADD
,
28067 IX86_BUILTIN_MASKLOADQ
,
28068 IX86_BUILTIN_MASKLOADD256
,
28069 IX86_BUILTIN_MASKLOADQ256
,
28070 IX86_BUILTIN_MASKSTORED
,
28071 IX86_BUILTIN_MASKSTOREQ
,
28072 IX86_BUILTIN_MASKSTORED256
,
28073 IX86_BUILTIN_MASKSTOREQ256
,
28074 IX86_BUILTIN_PSLLVV4DI
,
28075 IX86_BUILTIN_PSLLVV2DI
,
28076 IX86_BUILTIN_PSLLVV8SI
,
28077 IX86_BUILTIN_PSLLVV4SI
,
28078 IX86_BUILTIN_PSRAVV8SI
,
28079 IX86_BUILTIN_PSRAVV4SI
,
28080 IX86_BUILTIN_PSRLVV4DI
,
28081 IX86_BUILTIN_PSRLVV2DI
,
28082 IX86_BUILTIN_PSRLVV8SI
,
28083 IX86_BUILTIN_PSRLVV4SI
,
28085 IX86_BUILTIN_GATHERSIV2DF
,
28086 IX86_BUILTIN_GATHERSIV4DF
,
28087 IX86_BUILTIN_GATHERDIV2DF
,
28088 IX86_BUILTIN_GATHERDIV4DF
,
28089 IX86_BUILTIN_GATHERSIV4SF
,
28090 IX86_BUILTIN_GATHERSIV8SF
,
28091 IX86_BUILTIN_GATHERDIV4SF
,
28092 IX86_BUILTIN_GATHERDIV8SF
,
28093 IX86_BUILTIN_GATHERSIV2DI
,
28094 IX86_BUILTIN_GATHERSIV4DI
,
28095 IX86_BUILTIN_GATHERDIV2DI
,
28096 IX86_BUILTIN_GATHERDIV4DI
,
28097 IX86_BUILTIN_GATHERSIV4SI
,
28098 IX86_BUILTIN_GATHERSIV8SI
,
28099 IX86_BUILTIN_GATHERDIV4SI
,
28100 IX86_BUILTIN_GATHERDIV8SI
,
28103 IX86_BUILTIN_SI512_SI256
,
28104 IX86_BUILTIN_PD512_PD256
,
28105 IX86_BUILTIN_PS512_PS256
,
28106 IX86_BUILTIN_SI512_SI
,
28107 IX86_BUILTIN_PD512_PD
,
28108 IX86_BUILTIN_PS512_PS
,
28109 IX86_BUILTIN_ADDPD512
,
28110 IX86_BUILTIN_ADDPS512
,
28111 IX86_BUILTIN_ADDSD_ROUND
,
28112 IX86_BUILTIN_ADDSS_ROUND
,
28113 IX86_BUILTIN_ALIGND512
,
28114 IX86_BUILTIN_ALIGNQ512
,
28115 IX86_BUILTIN_BLENDMD512
,
28116 IX86_BUILTIN_BLENDMPD512
,
28117 IX86_BUILTIN_BLENDMPS512
,
28118 IX86_BUILTIN_BLENDMQ512
,
28119 IX86_BUILTIN_BROADCASTF32X4_512
,
28120 IX86_BUILTIN_BROADCASTF64X4_512
,
28121 IX86_BUILTIN_BROADCASTI32X4_512
,
28122 IX86_BUILTIN_BROADCASTI64X4_512
,
28123 IX86_BUILTIN_BROADCASTSD512
,
28124 IX86_BUILTIN_BROADCASTSS512
,
28125 IX86_BUILTIN_CMPD512
,
28126 IX86_BUILTIN_CMPPD512
,
28127 IX86_BUILTIN_CMPPS512
,
28128 IX86_BUILTIN_CMPQ512
,
28129 IX86_BUILTIN_CMPSD_MASK
,
28130 IX86_BUILTIN_CMPSS_MASK
,
28131 IX86_BUILTIN_COMIDF
,
28132 IX86_BUILTIN_COMISF
,
28133 IX86_BUILTIN_COMPRESSPD512
,
28134 IX86_BUILTIN_COMPRESSPDSTORE512
,
28135 IX86_BUILTIN_COMPRESSPS512
,
28136 IX86_BUILTIN_COMPRESSPSSTORE512
,
28137 IX86_BUILTIN_CVTDQ2PD512
,
28138 IX86_BUILTIN_CVTDQ2PS512
,
28139 IX86_BUILTIN_CVTPD2DQ512
,
28140 IX86_BUILTIN_CVTPD2PS512
,
28141 IX86_BUILTIN_CVTPD2UDQ512
,
28142 IX86_BUILTIN_CVTPH2PS512
,
28143 IX86_BUILTIN_CVTPS2DQ512
,
28144 IX86_BUILTIN_CVTPS2PD512
,
28145 IX86_BUILTIN_CVTPS2PH512
,
28146 IX86_BUILTIN_CVTPS2UDQ512
,
28147 IX86_BUILTIN_CVTSD2SS_ROUND
,
28148 IX86_BUILTIN_CVTSI2SD64
,
28149 IX86_BUILTIN_CVTSI2SS32
,
28150 IX86_BUILTIN_CVTSI2SS64
,
28151 IX86_BUILTIN_CVTSS2SD_ROUND
,
28152 IX86_BUILTIN_CVTTPD2DQ512
,
28153 IX86_BUILTIN_CVTTPD2UDQ512
,
28154 IX86_BUILTIN_CVTTPS2DQ512
,
28155 IX86_BUILTIN_CVTTPS2UDQ512
,
28156 IX86_BUILTIN_CVTUDQ2PD512
,
28157 IX86_BUILTIN_CVTUDQ2PS512
,
28158 IX86_BUILTIN_CVTUSI2SD32
,
28159 IX86_BUILTIN_CVTUSI2SD64
,
28160 IX86_BUILTIN_CVTUSI2SS32
,
28161 IX86_BUILTIN_CVTUSI2SS64
,
28162 IX86_BUILTIN_DIVPD512
,
28163 IX86_BUILTIN_DIVPS512
,
28164 IX86_BUILTIN_DIVSD_ROUND
,
28165 IX86_BUILTIN_DIVSS_ROUND
,
28166 IX86_BUILTIN_EXPANDPD512
,
28167 IX86_BUILTIN_EXPANDPD512Z
,
28168 IX86_BUILTIN_EXPANDPDLOAD512
,
28169 IX86_BUILTIN_EXPANDPDLOAD512Z
,
28170 IX86_BUILTIN_EXPANDPS512
,
28171 IX86_BUILTIN_EXPANDPS512Z
,
28172 IX86_BUILTIN_EXPANDPSLOAD512
,
28173 IX86_BUILTIN_EXPANDPSLOAD512Z
,
28174 IX86_BUILTIN_EXTRACTF32X4
,
28175 IX86_BUILTIN_EXTRACTF64X4
,
28176 IX86_BUILTIN_EXTRACTI32X4
,
28177 IX86_BUILTIN_EXTRACTI64X4
,
28178 IX86_BUILTIN_FIXUPIMMPD512_MASK
,
28179 IX86_BUILTIN_FIXUPIMMPD512_MASKZ
,
28180 IX86_BUILTIN_FIXUPIMMPS512_MASK
,
28181 IX86_BUILTIN_FIXUPIMMPS512_MASKZ
,
28182 IX86_BUILTIN_FIXUPIMMSD128_MASK
,
28183 IX86_BUILTIN_FIXUPIMMSD128_MASKZ
,
28184 IX86_BUILTIN_FIXUPIMMSS128_MASK
,
28185 IX86_BUILTIN_FIXUPIMMSS128_MASKZ
,
28186 IX86_BUILTIN_GETEXPPD512
,
28187 IX86_BUILTIN_GETEXPPS512
,
28188 IX86_BUILTIN_GETEXPSD128
,
28189 IX86_BUILTIN_GETEXPSS128
,
28190 IX86_BUILTIN_GETMANTPD512
,
28191 IX86_BUILTIN_GETMANTPS512
,
28192 IX86_BUILTIN_GETMANTSD128
,
28193 IX86_BUILTIN_GETMANTSS128
,
28194 IX86_BUILTIN_INSERTF32X4
,
28195 IX86_BUILTIN_INSERTF64X4
,
28196 IX86_BUILTIN_INSERTI32X4
,
28197 IX86_BUILTIN_INSERTI64X4
,
28198 IX86_BUILTIN_LOADAPD512
,
28199 IX86_BUILTIN_LOADAPS512
,
28200 IX86_BUILTIN_LOADDQUDI512
,
28201 IX86_BUILTIN_LOADDQUSI512
,
28202 IX86_BUILTIN_LOADUPD512
,
28203 IX86_BUILTIN_LOADUPS512
,
28204 IX86_BUILTIN_MAXPD512
,
28205 IX86_BUILTIN_MAXPS512
,
28206 IX86_BUILTIN_MAXSD_ROUND
,
28207 IX86_BUILTIN_MAXSS_ROUND
,
28208 IX86_BUILTIN_MINPD512
,
28209 IX86_BUILTIN_MINPS512
,
28210 IX86_BUILTIN_MINSD_ROUND
,
28211 IX86_BUILTIN_MINSS_ROUND
,
28212 IX86_BUILTIN_MOVAPD512
,
28213 IX86_BUILTIN_MOVAPS512
,
28214 IX86_BUILTIN_MOVDDUP512
,
28215 IX86_BUILTIN_MOVDQA32LOAD512
,
28216 IX86_BUILTIN_MOVDQA32STORE512
,
28217 IX86_BUILTIN_MOVDQA32_512
,
28218 IX86_BUILTIN_MOVDQA64LOAD512
,
28219 IX86_BUILTIN_MOVDQA64STORE512
,
28220 IX86_BUILTIN_MOVDQA64_512
,
28221 IX86_BUILTIN_MOVNTDQ512
,
28222 IX86_BUILTIN_MOVNTDQA512
,
28223 IX86_BUILTIN_MOVNTPD512
,
28224 IX86_BUILTIN_MOVNTPS512
,
28225 IX86_BUILTIN_MOVSHDUP512
,
28226 IX86_BUILTIN_MOVSLDUP512
,
28227 IX86_BUILTIN_MULPD512
,
28228 IX86_BUILTIN_MULPS512
,
28229 IX86_BUILTIN_MULSD_ROUND
,
28230 IX86_BUILTIN_MULSS_ROUND
,
28231 IX86_BUILTIN_PABSD512
,
28232 IX86_BUILTIN_PABSQ512
,
28233 IX86_BUILTIN_PADDD512
,
28234 IX86_BUILTIN_PADDQ512
,
28235 IX86_BUILTIN_PANDD512
,
28236 IX86_BUILTIN_PANDND512
,
28237 IX86_BUILTIN_PANDNQ512
,
28238 IX86_BUILTIN_PANDQ512
,
28239 IX86_BUILTIN_PBROADCASTD512
,
28240 IX86_BUILTIN_PBROADCASTD512_GPR
,
28241 IX86_BUILTIN_PBROADCASTMB512
,
28242 IX86_BUILTIN_PBROADCASTMW512
,
28243 IX86_BUILTIN_PBROADCASTQ512
,
28244 IX86_BUILTIN_PBROADCASTQ512_GPR
,
28245 IX86_BUILTIN_PBROADCASTQ512_MEM
,
28246 IX86_BUILTIN_PCMPEQD512_MASK
,
28247 IX86_BUILTIN_PCMPEQQ512_MASK
,
28248 IX86_BUILTIN_PCMPGTD512_MASK
,
28249 IX86_BUILTIN_PCMPGTQ512_MASK
,
28250 IX86_BUILTIN_PCOMPRESSD512
,
28251 IX86_BUILTIN_PCOMPRESSDSTORE512
,
28252 IX86_BUILTIN_PCOMPRESSQ512
,
28253 IX86_BUILTIN_PCOMPRESSQSTORE512
,
28254 IX86_BUILTIN_PEXPANDD512
,
28255 IX86_BUILTIN_PEXPANDD512Z
,
28256 IX86_BUILTIN_PEXPANDDLOAD512
,
28257 IX86_BUILTIN_PEXPANDDLOAD512Z
,
28258 IX86_BUILTIN_PEXPANDQ512
,
28259 IX86_BUILTIN_PEXPANDQ512Z
,
28260 IX86_BUILTIN_PEXPANDQLOAD512
,
28261 IX86_BUILTIN_PEXPANDQLOAD512Z
,
28262 IX86_BUILTIN_PMAXSD512
,
28263 IX86_BUILTIN_PMAXSQ512
,
28264 IX86_BUILTIN_PMAXUD512
,
28265 IX86_BUILTIN_PMAXUQ512
,
28266 IX86_BUILTIN_PMINSD512
,
28267 IX86_BUILTIN_PMINSQ512
,
28268 IX86_BUILTIN_PMINUD512
,
28269 IX86_BUILTIN_PMINUQ512
,
28270 IX86_BUILTIN_PMOVDB512
,
28271 IX86_BUILTIN_PMOVDB512_MEM
,
28272 IX86_BUILTIN_PMOVDW512
,
28273 IX86_BUILTIN_PMOVDW512_MEM
,
28274 IX86_BUILTIN_PMOVQB512
,
28275 IX86_BUILTIN_PMOVQB512_MEM
,
28276 IX86_BUILTIN_PMOVQD512
,
28277 IX86_BUILTIN_PMOVQD512_MEM
,
28278 IX86_BUILTIN_PMOVQW512
,
28279 IX86_BUILTIN_PMOVQW512_MEM
,
28280 IX86_BUILTIN_PMOVSDB512
,
28281 IX86_BUILTIN_PMOVSDB512_MEM
,
28282 IX86_BUILTIN_PMOVSDW512
,
28283 IX86_BUILTIN_PMOVSDW512_MEM
,
28284 IX86_BUILTIN_PMOVSQB512
,
28285 IX86_BUILTIN_PMOVSQB512_MEM
,
28286 IX86_BUILTIN_PMOVSQD512
,
28287 IX86_BUILTIN_PMOVSQD512_MEM
,
28288 IX86_BUILTIN_PMOVSQW512
,
28289 IX86_BUILTIN_PMOVSQW512_MEM
,
28290 IX86_BUILTIN_PMOVSXBD512
,
28291 IX86_BUILTIN_PMOVSXBQ512
,
28292 IX86_BUILTIN_PMOVSXDQ512
,
28293 IX86_BUILTIN_PMOVSXWD512
,
28294 IX86_BUILTIN_PMOVSXWQ512
,
28295 IX86_BUILTIN_PMOVUSDB512
,
28296 IX86_BUILTIN_PMOVUSDB512_MEM
,
28297 IX86_BUILTIN_PMOVUSDW512
,
28298 IX86_BUILTIN_PMOVUSDW512_MEM
,
28299 IX86_BUILTIN_PMOVUSQB512
,
28300 IX86_BUILTIN_PMOVUSQB512_MEM
,
28301 IX86_BUILTIN_PMOVUSQD512
,
28302 IX86_BUILTIN_PMOVUSQD512_MEM
,
28303 IX86_BUILTIN_PMOVUSQW512
,
28304 IX86_BUILTIN_PMOVUSQW512_MEM
,
28305 IX86_BUILTIN_PMOVZXBD512
,
28306 IX86_BUILTIN_PMOVZXBQ512
,
28307 IX86_BUILTIN_PMOVZXDQ512
,
28308 IX86_BUILTIN_PMOVZXWD512
,
28309 IX86_BUILTIN_PMOVZXWQ512
,
28310 IX86_BUILTIN_PMULDQ512
,
28311 IX86_BUILTIN_PMULLD512
,
28312 IX86_BUILTIN_PMULUDQ512
,
28313 IX86_BUILTIN_PORD512
,
28314 IX86_BUILTIN_PORQ512
,
28315 IX86_BUILTIN_PROLD512
,
28316 IX86_BUILTIN_PROLQ512
,
28317 IX86_BUILTIN_PROLVD512
,
28318 IX86_BUILTIN_PROLVQ512
,
28319 IX86_BUILTIN_PRORD512
,
28320 IX86_BUILTIN_PRORQ512
,
28321 IX86_BUILTIN_PRORVD512
,
28322 IX86_BUILTIN_PRORVQ512
,
28323 IX86_BUILTIN_PSHUFD512
,
28324 IX86_BUILTIN_PSLLD512
,
28325 IX86_BUILTIN_PSLLDI512
,
28326 IX86_BUILTIN_PSLLQ512
,
28327 IX86_BUILTIN_PSLLQI512
,
28328 IX86_BUILTIN_PSLLVV16SI
,
28329 IX86_BUILTIN_PSLLVV8DI
,
28330 IX86_BUILTIN_PSRAD512
,
28331 IX86_BUILTIN_PSRADI512
,
28332 IX86_BUILTIN_PSRAQ512
,
28333 IX86_BUILTIN_PSRAQI512
,
28334 IX86_BUILTIN_PSRAVV16SI
,
28335 IX86_BUILTIN_PSRAVV8DI
,
28336 IX86_BUILTIN_PSRLD512
,
28337 IX86_BUILTIN_PSRLDI512
,
28338 IX86_BUILTIN_PSRLQ512
,
28339 IX86_BUILTIN_PSRLQI512
,
28340 IX86_BUILTIN_PSRLVV16SI
,
28341 IX86_BUILTIN_PSRLVV8DI
,
28342 IX86_BUILTIN_PSUBD512
,
28343 IX86_BUILTIN_PSUBQ512
,
28344 IX86_BUILTIN_PTESTMD512
,
28345 IX86_BUILTIN_PTESTMQ512
,
28346 IX86_BUILTIN_PTESTNMD512
,
28347 IX86_BUILTIN_PTESTNMQ512
,
28348 IX86_BUILTIN_PUNPCKHDQ512
,
28349 IX86_BUILTIN_PUNPCKHQDQ512
,
28350 IX86_BUILTIN_PUNPCKLDQ512
,
28351 IX86_BUILTIN_PUNPCKLQDQ512
,
28352 IX86_BUILTIN_PXORD512
,
28353 IX86_BUILTIN_PXORQ512
,
28354 IX86_BUILTIN_RCP14PD512
,
28355 IX86_BUILTIN_RCP14PS512
,
28356 IX86_BUILTIN_RCP14SD
,
28357 IX86_BUILTIN_RCP14SS
,
28358 IX86_BUILTIN_RNDSCALEPD
,
28359 IX86_BUILTIN_RNDSCALEPS
,
28360 IX86_BUILTIN_RNDSCALESD
,
28361 IX86_BUILTIN_RNDSCALESS
,
28362 IX86_BUILTIN_RSQRT14PD512
,
28363 IX86_BUILTIN_RSQRT14PS512
,
28364 IX86_BUILTIN_RSQRT14SD
,
28365 IX86_BUILTIN_RSQRT14SS
,
28366 IX86_BUILTIN_SCALEFPD512
,
28367 IX86_BUILTIN_SCALEFPS512
,
28368 IX86_BUILTIN_SCALEFSD
,
28369 IX86_BUILTIN_SCALEFSS
,
28370 IX86_BUILTIN_SHUFPD512
,
28371 IX86_BUILTIN_SHUFPS512
,
28372 IX86_BUILTIN_SHUF_F32x4
,
28373 IX86_BUILTIN_SHUF_F64x2
,
28374 IX86_BUILTIN_SHUF_I32x4
,
28375 IX86_BUILTIN_SHUF_I64x2
,
28376 IX86_BUILTIN_SQRTPD512
,
28377 IX86_BUILTIN_SQRTPD512_MASK
,
28378 IX86_BUILTIN_SQRTPS512_MASK
,
28379 IX86_BUILTIN_SQRTPS_NR512
,
28380 IX86_BUILTIN_SQRTSD_ROUND
,
28381 IX86_BUILTIN_SQRTSS_ROUND
,
28382 IX86_BUILTIN_STOREAPD512
,
28383 IX86_BUILTIN_STOREAPS512
,
28384 IX86_BUILTIN_STOREDQUDI512
,
28385 IX86_BUILTIN_STOREDQUSI512
,
28386 IX86_BUILTIN_STOREUPD512
,
28387 IX86_BUILTIN_STOREUPS512
,
28388 IX86_BUILTIN_SUBPD512
,
28389 IX86_BUILTIN_SUBPS512
,
28390 IX86_BUILTIN_SUBSD_ROUND
,
28391 IX86_BUILTIN_SUBSS_ROUND
,
28392 IX86_BUILTIN_UCMPD512
,
28393 IX86_BUILTIN_UCMPQ512
,
28394 IX86_BUILTIN_UNPCKHPD512
,
28395 IX86_BUILTIN_UNPCKHPS512
,
28396 IX86_BUILTIN_UNPCKLPD512
,
28397 IX86_BUILTIN_UNPCKLPS512
,
28398 IX86_BUILTIN_VCVTSD2SI32
,
28399 IX86_BUILTIN_VCVTSD2SI64
,
28400 IX86_BUILTIN_VCVTSD2USI32
,
28401 IX86_BUILTIN_VCVTSD2USI64
,
28402 IX86_BUILTIN_VCVTSS2SI32
,
28403 IX86_BUILTIN_VCVTSS2SI64
,
28404 IX86_BUILTIN_VCVTSS2USI32
,
28405 IX86_BUILTIN_VCVTSS2USI64
,
28406 IX86_BUILTIN_VCVTTSD2SI32
,
28407 IX86_BUILTIN_VCVTTSD2SI64
,
28408 IX86_BUILTIN_VCVTTSD2USI32
,
28409 IX86_BUILTIN_VCVTTSD2USI64
,
28410 IX86_BUILTIN_VCVTTSS2SI32
,
28411 IX86_BUILTIN_VCVTTSS2SI64
,
28412 IX86_BUILTIN_VCVTTSS2USI32
,
28413 IX86_BUILTIN_VCVTTSS2USI64
,
28414 IX86_BUILTIN_VFMADDPD512_MASK
,
28415 IX86_BUILTIN_VFMADDPD512_MASK3
,
28416 IX86_BUILTIN_VFMADDPD512_MASKZ
,
28417 IX86_BUILTIN_VFMADDPS512_MASK
,
28418 IX86_BUILTIN_VFMADDPS512_MASK3
,
28419 IX86_BUILTIN_VFMADDPS512_MASKZ
,
28420 IX86_BUILTIN_VFMADDSD3_ROUND
,
28421 IX86_BUILTIN_VFMADDSS3_ROUND
,
28422 IX86_BUILTIN_VFMADDSUBPD512_MASK
,
28423 IX86_BUILTIN_VFMADDSUBPD512_MASK3
,
28424 IX86_BUILTIN_VFMADDSUBPD512_MASKZ
,
28425 IX86_BUILTIN_VFMADDSUBPS512_MASK
,
28426 IX86_BUILTIN_VFMADDSUBPS512_MASK3
,
28427 IX86_BUILTIN_VFMADDSUBPS512_MASKZ
,
28428 IX86_BUILTIN_VFMSUBADDPD512_MASK3
,
28429 IX86_BUILTIN_VFMSUBADDPS512_MASK3
,
28430 IX86_BUILTIN_VFMSUBPD512_MASK3
,
28431 IX86_BUILTIN_VFMSUBPS512_MASK3
,
28432 IX86_BUILTIN_VFMSUBSD3_MASK3
,
28433 IX86_BUILTIN_VFMSUBSS3_MASK3
,
28434 IX86_BUILTIN_VFNMADDPD512_MASK
,
28435 IX86_BUILTIN_VFNMADDPS512_MASK
,
28436 IX86_BUILTIN_VFNMSUBPD512_MASK
,
28437 IX86_BUILTIN_VFNMSUBPD512_MASK3
,
28438 IX86_BUILTIN_VFNMSUBPS512_MASK
,
28439 IX86_BUILTIN_VFNMSUBPS512_MASK3
,
28440 IX86_BUILTIN_VPCLZCNTD512
,
28441 IX86_BUILTIN_VPCLZCNTQ512
,
28442 IX86_BUILTIN_VPCONFLICTD512
,
28443 IX86_BUILTIN_VPCONFLICTQ512
,
28444 IX86_BUILTIN_VPERMDF512
,
28445 IX86_BUILTIN_VPERMDI512
,
28446 IX86_BUILTIN_VPERMI2VARD512
,
28447 IX86_BUILTIN_VPERMI2VARPD512
,
28448 IX86_BUILTIN_VPERMI2VARPS512
,
28449 IX86_BUILTIN_VPERMI2VARQ512
,
28450 IX86_BUILTIN_VPERMILPD512
,
28451 IX86_BUILTIN_VPERMILPS512
,
28452 IX86_BUILTIN_VPERMILVARPD512
,
28453 IX86_BUILTIN_VPERMILVARPS512
,
28454 IX86_BUILTIN_VPERMT2VARD512
,
28455 IX86_BUILTIN_VPERMT2VARD512_MASKZ
,
28456 IX86_BUILTIN_VPERMT2VARPD512
,
28457 IX86_BUILTIN_VPERMT2VARPD512_MASKZ
,
28458 IX86_BUILTIN_VPERMT2VARPS512
,
28459 IX86_BUILTIN_VPERMT2VARPS512_MASKZ
,
28460 IX86_BUILTIN_VPERMT2VARQ512
,
28461 IX86_BUILTIN_VPERMT2VARQ512_MASKZ
,
28462 IX86_BUILTIN_VPERMVARDF512
,
28463 IX86_BUILTIN_VPERMVARDI512
,
28464 IX86_BUILTIN_VPERMVARSF512
,
28465 IX86_BUILTIN_VPERMVARSI512
,
28466 IX86_BUILTIN_VTERNLOGD512_MASK
,
28467 IX86_BUILTIN_VTERNLOGD512_MASKZ
,
28468 IX86_BUILTIN_VTERNLOGQ512_MASK
,
28469 IX86_BUILTIN_VTERNLOGQ512_MASKZ
,
28471 /* Mask arithmetic operations */
28472 IX86_BUILTIN_KAND16
,
28473 IX86_BUILTIN_KANDN16
,
28474 IX86_BUILTIN_KNOT16
,
28475 IX86_BUILTIN_KOR16
,
28476 IX86_BUILTIN_KORTESTC16
,
28477 IX86_BUILTIN_KORTESTZ16
,
28478 IX86_BUILTIN_KUNPCKBW
,
28479 IX86_BUILTIN_KXNOR16
,
28480 IX86_BUILTIN_KXOR16
,
28481 IX86_BUILTIN_KMOV16
,
28483 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28484 where all operands are 32-byte or 64-byte wide respectively. */
28485 IX86_BUILTIN_GATHERALTSIV4DF
,
28486 IX86_BUILTIN_GATHERALTDIV8SF
,
28487 IX86_BUILTIN_GATHERALTSIV4DI
,
28488 IX86_BUILTIN_GATHERALTDIV8SI
,
28489 IX86_BUILTIN_GATHER3ALTDIV16SF
,
28490 IX86_BUILTIN_GATHER3ALTDIV16SI
,
28491 IX86_BUILTIN_GATHER3ALTSIV8DF
,
28492 IX86_BUILTIN_GATHER3ALTSIV8DI
,
28493 IX86_BUILTIN_GATHER3DIV16SF
,
28494 IX86_BUILTIN_GATHER3DIV16SI
,
28495 IX86_BUILTIN_GATHER3DIV8DF
,
28496 IX86_BUILTIN_GATHER3DIV8DI
,
28497 IX86_BUILTIN_GATHER3SIV16SF
,
28498 IX86_BUILTIN_GATHER3SIV16SI
,
28499 IX86_BUILTIN_GATHER3SIV8DF
,
28500 IX86_BUILTIN_GATHER3SIV8DI
,
28501 IX86_BUILTIN_SCATTERDIV16SF
,
28502 IX86_BUILTIN_SCATTERDIV16SI
,
28503 IX86_BUILTIN_SCATTERDIV8DF
,
28504 IX86_BUILTIN_SCATTERDIV8DI
,
28505 IX86_BUILTIN_SCATTERSIV16SF
,
28506 IX86_BUILTIN_SCATTERSIV16SI
,
28507 IX86_BUILTIN_SCATTERSIV8DF
,
28508 IX86_BUILTIN_SCATTERSIV8DI
,
28511 IX86_BUILTIN_GATHERPFQPD
,
28512 IX86_BUILTIN_GATHERPFDPS
,
28513 IX86_BUILTIN_GATHERPFDPD
,
28514 IX86_BUILTIN_GATHERPFQPS
,
28515 IX86_BUILTIN_SCATTERPFDPD
,
28516 IX86_BUILTIN_SCATTERPFDPS
,
28517 IX86_BUILTIN_SCATTERPFQPD
,
28518 IX86_BUILTIN_SCATTERPFQPS
,
28521 IX86_BUILTIN_EXP2PD_MASK
,
28522 IX86_BUILTIN_EXP2PS_MASK
,
28523 IX86_BUILTIN_EXP2PS
,
28524 IX86_BUILTIN_RCP28PD
,
28525 IX86_BUILTIN_RCP28PS
,
28526 IX86_BUILTIN_RCP28SD
,
28527 IX86_BUILTIN_RCP28SS
,
28528 IX86_BUILTIN_RSQRT28PD
,
28529 IX86_BUILTIN_RSQRT28PS
,
28530 IX86_BUILTIN_RSQRT28SD
,
28531 IX86_BUILTIN_RSQRT28SS
,
28533 /* SHA builtins. */
28534 IX86_BUILTIN_SHA1MSG1
,
28535 IX86_BUILTIN_SHA1MSG2
,
28536 IX86_BUILTIN_SHA1NEXTE
,
28537 IX86_BUILTIN_SHA1RNDS4
,
28538 IX86_BUILTIN_SHA256MSG1
,
28539 IX86_BUILTIN_SHA256MSG2
,
28540 IX86_BUILTIN_SHA256RNDS2
,
28542 /* CLFLUSHOPT instructions. */
28543 IX86_BUILTIN_CLFLUSHOPT
,
28545 /* TFmode support builtins. */
28547 IX86_BUILTIN_HUGE_VALQ
,
28548 IX86_BUILTIN_FABSQ
,
28549 IX86_BUILTIN_COPYSIGNQ
,
28551 /* Vectorizer support builtins. */
28552 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512
,
28553 IX86_BUILTIN_CPYSGNPS
,
28554 IX86_BUILTIN_CPYSGNPD
,
28555 IX86_BUILTIN_CPYSGNPS256
,
28556 IX86_BUILTIN_CPYSGNPS512
,
28557 IX86_BUILTIN_CPYSGNPD256
,
28558 IX86_BUILTIN_CPYSGNPD512
,
28559 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512
,
28560 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512
,
28563 /* FMA4 instructions. */
28564 IX86_BUILTIN_VFMADDSS
,
28565 IX86_BUILTIN_VFMADDSD
,
28566 IX86_BUILTIN_VFMADDPS
,
28567 IX86_BUILTIN_VFMADDPD
,
28568 IX86_BUILTIN_VFMADDPS256
,
28569 IX86_BUILTIN_VFMADDPD256
,
28570 IX86_BUILTIN_VFMADDSUBPS
,
28571 IX86_BUILTIN_VFMADDSUBPD
,
28572 IX86_BUILTIN_VFMADDSUBPS256
,
28573 IX86_BUILTIN_VFMADDSUBPD256
,
28575 /* FMA3 instructions. */
28576 IX86_BUILTIN_VFMADDSS3
,
28577 IX86_BUILTIN_VFMADDSD3
,
28579 /* XOP instructions. */
28580 IX86_BUILTIN_VPCMOV
,
28581 IX86_BUILTIN_VPCMOV_V2DI
,
28582 IX86_BUILTIN_VPCMOV_V4SI
,
28583 IX86_BUILTIN_VPCMOV_V8HI
,
28584 IX86_BUILTIN_VPCMOV_V16QI
,
28585 IX86_BUILTIN_VPCMOV_V4SF
,
28586 IX86_BUILTIN_VPCMOV_V2DF
,
28587 IX86_BUILTIN_VPCMOV256
,
28588 IX86_BUILTIN_VPCMOV_V4DI256
,
28589 IX86_BUILTIN_VPCMOV_V8SI256
,
28590 IX86_BUILTIN_VPCMOV_V16HI256
,
28591 IX86_BUILTIN_VPCMOV_V32QI256
,
28592 IX86_BUILTIN_VPCMOV_V8SF256
,
28593 IX86_BUILTIN_VPCMOV_V4DF256
,
28595 IX86_BUILTIN_VPPERM
,
28597 IX86_BUILTIN_VPMACSSWW
,
28598 IX86_BUILTIN_VPMACSWW
,
28599 IX86_BUILTIN_VPMACSSWD
,
28600 IX86_BUILTIN_VPMACSWD
,
28601 IX86_BUILTIN_VPMACSSDD
,
28602 IX86_BUILTIN_VPMACSDD
,
28603 IX86_BUILTIN_VPMACSSDQL
,
28604 IX86_BUILTIN_VPMACSSDQH
,
28605 IX86_BUILTIN_VPMACSDQL
,
28606 IX86_BUILTIN_VPMACSDQH
,
28607 IX86_BUILTIN_VPMADCSSWD
,
28608 IX86_BUILTIN_VPMADCSWD
,
28610 IX86_BUILTIN_VPHADDBW
,
28611 IX86_BUILTIN_VPHADDBD
,
28612 IX86_BUILTIN_VPHADDBQ
,
28613 IX86_BUILTIN_VPHADDWD
,
28614 IX86_BUILTIN_VPHADDWQ
,
28615 IX86_BUILTIN_VPHADDDQ
,
28616 IX86_BUILTIN_VPHADDUBW
,
28617 IX86_BUILTIN_VPHADDUBD
,
28618 IX86_BUILTIN_VPHADDUBQ
,
28619 IX86_BUILTIN_VPHADDUWD
,
28620 IX86_BUILTIN_VPHADDUWQ
,
28621 IX86_BUILTIN_VPHADDUDQ
,
28622 IX86_BUILTIN_VPHSUBBW
,
28623 IX86_BUILTIN_VPHSUBWD
,
28624 IX86_BUILTIN_VPHSUBDQ
,
28626 IX86_BUILTIN_VPROTB
,
28627 IX86_BUILTIN_VPROTW
,
28628 IX86_BUILTIN_VPROTD
,
28629 IX86_BUILTIN_VPROTQ
,
28630 IX86_BUILTIN_VPROTB_IMM
,
28631 IX86_BUILTIN_VPROTW_IMM
,
28632 IX86_BUILTIN_VPROTD_IMM
,
28633 IX86_BUILTIN_VPROTQ_IMM
,
28635 IX86_BUILTIN_VPSHLB
,
28636 IX86_BUILTIN_VPSHLW
,
28637 IX86_BUILTIN_VPSHLD
,
28638 IX86_BUILTIN_VPSHLQ
,
28639 IX86_BUILTIN_VPSHAB
,
28640 IX86_BUILTIN_VPSHAW
,
28641 IX86_BUILTIN_VPSHAD
,
28642 IX86_BUILTIN_VPSHAQ
,
28644 IX86_BUILTIN_VFRCZSS
,
28645 IX86_BUILTIN_VFRCZSD
,
28646 IX86_BUILTIN_VFRCZPS
,
28647 IX86_BUILTIN_VFRCZPD
,
28648 IX86_BUILTIN_VFRCZPS256
,
28649 IX86_BUILTIN_VFRCZPD256
,
28651 IX86_BUILTIN_VPCOMEQUB
,
28652 IX86_BUILTIN_VPCOMNEUB
,
28653 IX86_BUILTIN_VPCOMLTUB
,
28654 IX86_BUILTIN_VPCOMLEUB
,
28655 IX86_BUILTIN_VPCOMGTUB
,
28656 IX86_BUILTIN_VPCOMGEUB
,
28657 IX86_BUILTIN_VPCOMFALSEUB
,
28658 IX86_BUILTIN_VPCOMTRUEUB
,
28660 IX86_BUILTIN_VPCOMEQUW
,
28661 IX86_BUILTIN_VPCOMNEUW
,
28662 IX86_BUILTIN_VPCOMLTUW
,
28663 IX86_BUILTIN_VPCOMLEUW
,
28664 IX86_BUILTIN_VPCOMGTUW
,
28665 IX86_BUILTIN_VPCOMGEUW
,
28666 IX86_BUILTIN_VPCOMFALSEUW
,
28667 IX86_BUILTIN_VPCOMTRUEUW
,
28669 IX86_BUILTIN_VPCOMEQUD
,
28670 IX86_BUILTIN_VPCOMNEUD
,
28671 IX86_BUILTIN_VPCOMLTUD
,
28672 IX86_BUILTIN_VPCOMLEUD
,
28673 IX86_BUILTIN_VPCOMGTUD
,
28674 IX86_BUILTIN_VPCOMGEUD
,
28675 IX86_BUILTIN_VPCOMFALSEUD
,
28676 IX86_BUILTIN_VPCOMTRUEUD
,
28678 IX86_BUILTIN_VPCOMEQUQ
,
28679 IX86_BUILTIN_VPCOMNEUQ
,
28680 IX86_BUILTIN_VPCOMLTUQ
,
28681 IX86_BUILTIN_VPCOMLEUQ
,
28682 IX86_BUILTIN_VPCOMGTUQ
,
28683 IX86_BUILTIN_VPCOMGEUQ
,
28684 IX86_BUILTIN_VPCOMFALSEUQ
,
28685 IX86_BUILTIN_VPCOMTRUEUQ
,
28687 IX86_BUILTIN_VPCOMEQB
,
28688 IX86_BUILTIN_VPCOMNEB
,
28689 IX86_BUILTIN_VPCOMLTB
,
28690 IX86_BUILTIN_VPCOMLEB
,
28691 IX86_BUILTIN_VPCOMGTB
,
28692 IX86_BUILTIN_VPCOMGEB
,
28693 IX86_BUILTIN_VPCOMFALSEB
,
28694 IX86_BUILTIN_VPCOMTRUEB
,
28696 IX86_BUILTIN_VPCOMEQW
,
28697 IX86_BUILTIN_VPCOMNEW
,
28698 IX86_BUILTIN_VPCOMLTW
,
28699 IX86_BUILTIN_VPCOMLEW
,
28700 IX86_BUILTIN_VPCOMGTW
,
28701 IX86_BUILTIN_VPCOMGEW
,
28702 IX86_BUILTIN_VPCOMFALSEW
,
28703 IX86_BUILTIN_VPCOMTRUEW
,
28705 IX86_BUILTIN_VPCOMEQD
,
28706 IX86_BUILTIN_VPCOMNED
,
28707 IX86_BUILTIN_VPCOMLTD
,
28708 IX86_BUILTIN_VPCOMLED
,
28709 IX86_BUILTIN_VPCOMGTD
,
28710 IX86_BUILTIN_VPCOMGED
,
28711 IX86_BUILTIN_VPCOMFALSED
,
28712 IX86_BUILTIN_VPCOMTRUED
,
28714 IX86_BUILTIN_VPCOMEQQ
,
28715 IX86_BUILTIN_VPCOMNEQ
,
28716 IX86_BUILTIN_VPCOMLTQ
,
28717 IX86_BUILTIN_VPCOMLEQ
,
28718 IX86_BUILTIN_VPCOMGTQ
,
28719 IX86_BUILTIN_VPCOMGEQ
,
28720 IX86_BUILTIN_VPCOMFALSEQ
,
28721 IX86_BUILTIN_VPCOMTRUEQ
,
28723 /* LWP instructions. */
28724 IX86_BUILTIN_LLWPCB
,
28725 IX86_BUILTIN_SLWPCB
,
28726 IX86_BUILTIN_LWPVAL32
,
28727 IX86_BUILTIN_LWPVAL64
,
28728 IX86_BUILTIN_LWPINS32
,
28729 IX86_BUILTIN_LWPINS64
,
28734 IX86_BUILTIN_XBEGIN
,
28736 IX86_BUILTIN_XABORT
,
28737 IX86_BUILTIN_XTEST
,
28739 /* BMI instructions. */
28740 IX86_BUILTIN_BEXTR32
,
28741 IX86_BUILTIN_BEXTR64
,
28744 /* TBM instructions. */
28745 IX86_BUILTIN_BEXTRI32
,
28746 IX86_BUILTIN_BEXTRI64
,
28748 /* BMI2 instructions. */
28749 IX86_BUILTIN_BZHI32
,
28750 IX86_BUILTIN_BZHI64
,
28751 IX86_BUILTIN_PDEP32
,
28752 IX86_BUILTIN_PDEP64
,
28753 IX86_BUILTIN_PEXT32
,
28754 IX86_BUILTIN_PEXT64
,
28756 /* ADX instructions. */
28757 IX86_BUILTIN_ADDCARRYX32
,
28758 IX86_BUILTIN_ADDCARRYX64
,
28760 /* SBB instructions. */
28761 IX86_BUILTIN_SBB32
,
28762 IX86_BUILTIN_SBB64
,
28764 /* FSGSBASE instructions. */
28765 IX86_BUILTIN_RDFSBASE32
,
28766 IX86_BUILTIN_RDFSBASE64
,
28767 IX86_BUILTIN_RDGSBASE32
,
28768 IX86_BUILTIN_RDGSBASE64
,
28769 IX86_BUILTIN_WRFSBASE32
,
28770 IX86_BUILTIN_WRFSBASE64
,
28771 IX86_BUILTIN_WRGSBASE32
,
28772 IX86_BUILTIN_WRGSBASE64
,
28774 /* RDRND instructions. */
28775 IX86_BUILTIN_RDRAND16_STEP
,
28776 IX86_BUILTIN_RDRAND32_STEP
,
28777 IX86_BUILTIN_RDRAND64_STEP
,
28779 /* RDSEED instructions. */
28780 IX86_BUILTIN_RDSEED16_STEP
,
28781 IX86_BUILTIN_RDSEED32_STEP
,
28782 IX86_BUILTIN_RDSEED64_STEP
,
28784 /* F16C instructions. */
28785 IX86_BUILTIN_CVTPH2PS
,
28786 IX86_BUILTIN_CVTPH2PS256
,
28787 IX86_BUILTIN_CVTPS2PH
,
28788 IX86_BUILTIN_CVTPS2PH256
,
28790 /* CFString built-in for darwin */
28791 IX86_BUILTIN_CFSTRING
,
28793 /* Builtins to get CPU type and supported features. */
28794 IX86_BUILTIN_CPU_INIT
,
28795 IX86_BUILTIN_CPU_IS
,
28796 IX86_BUILTIN_CPU_SUPPORTS
,
28798 /* Read/write FLAGS register built-ins. */
28799 IX86_BUILTIN_READ_FLAGS
,
28800 IX86_BUILTIN_WRITE_FLAGS
,
28805 /* Table for the ix86 builtin decls. */
28806 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
28808 /* Table of all of the builtin functions that are possible with different ISA's
28809 but are waiting to be built until a function is declared to use that
28811 struct builtin_isa
{
28812 const char *name
; /* function name */
28813 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
28814 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
28815 bool const_p
; /* true if the declaration is constant */
28816 bool set_and_not_built_p
;
28819 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
28822 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28823 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28824 function decl in the ix86_builtins array. Returns the function decl or
28825 NULL_TREE, if the builtin was not added.
28827 If the front end has a special hook for builtin functions, delay adding
28828 builtin functions that aren't in the current ISA until the ISA is changed
28829 with function specific optimization. Doing so, can save about 300K for the
28830 default compiler. When the builtin is expanded, check at that time whether
28833 If the front end doesn't have a special hook, record all builtins, even if
28834 it isn't an instruction set in the current ISA in case the user uses
28835 function specific options for a different ISA, so that we don't get scope
28836 errors if a builtin is added in the middle of a function scope. */
28839 def_builtin (HOST_WIDE_INT mask
, const char *name
,
28840 enum ix86_builtin_func_type tcode
,
28841 enum ix86_builtins code
)
28843 tree decl
= NULL_TREE
;
28845 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
28847 ix86_builtins_isa
[(int) code
].isa
= mask
;
28849 mask
&= ~OPTION_MASK_ISA_64BIT
;
28851 || (mask
& ix86_isa_flags
) != 0
28852 || (lang_hooks
.builtin_function
28853 == lang_hooks
.builtin_function_ext_scope
))
28856 tree type
= ix86_get_builtin_func_type (tcode
);
28857 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28859 ix86_builtins
[(int) code
] = decl
;
28860 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
28864 ix86_builtins
[(int) code
] = NULL_TREE
;
28865 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
28866 ix86_builtins_isa
[(int) code
].name
= name
;
28867 ix86_builtins_isa
[(int) code
].const_p
= false;
28868 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
28875 /* Like def_builtin, but also marks the function decl "const". */
28878 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
28879 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
28881 tree decl
= def_builtin (mask
, name
, tcode
, code
);
28883 TREE_READONLY (decl
) = 1;
28885 ix86_builtins_isa
[(int) code
].const_p
= true;
28890 /* Add any new builtin functions for a given ISA that may not have been
28891 declared. This saves a bit of space compared to adding all of the
28892 declarations to the tree, even if we didn't use them. */
28895 ix86_add_new_builtins (HOST_WIDE_INT isa
)
28899 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
28901 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
28902 && ix86_builtins_isa
[i
].set_and_not_built_p
)
28906 /* Don't define the builtin again. */
28907 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
28909 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
28910 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
28911 type
, i
, BUILT_IN_MD
, NULL
,
28914 ix86_builtins
[i
] = decl
;
28915 if (ix86_builtins_isa
[i
].const_p
)
28916 TREE_READONLY (decl
) = 1;
28921 /* Bits for builtin_description.flag. */
28923 /* Set when we don't support the comparison natively, and should
28924 swap_comparison in order to support it. */
28925 #define BUILTIN_DESC_SWAP_OPERANDS 1
28927 struct builtin_description
28929 const HOST_WIDE_INT mask
;
28930 const enum insn_code icode
;
28931 const char *const name
;
28932 const enum ix86_builtins code
;
28933 const enum rtx_code comparison
;
28937 static const struct builtin_description bdesc_comi
[] =
28939 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
28940 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
28941 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
28942 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
28943 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
28944 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
28945 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
28946 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
28947 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
28948 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
28949 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
28950 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
28951 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
28952 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
28953 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
28954 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
28955 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
28956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
28957 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
28958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
28959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
28960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
28961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
28962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
28965 static const struct builtin_description bdesc_pcmpestr
[] =
28968 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
28969 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
28970 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
28971 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
28972 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
28973 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
28974 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
28977 static const struct builtin_description bdesc_pcmpistr
[] =
28980 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
28981 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
28982 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
28983 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
28984 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
28985 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
28986 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
28989 /* Special builtins with variable number of arguments. */
28990 static const struct builtin_description bdesc_special_args
[] =
28992 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28993 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
28994 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28996 /* 80387 (for use internally for atomic compound assignment). */
28997 { 0, CODE_FOR_fnstenv
, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28998 { 0, CODE_FOR_fldenv
, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV
, UNKNOWN
, (int) VOID_FTYPE_PCVOID
},
28999 { 0, CODE_FOR_fnstsw
, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW
, UNKNOWN
, (int) USHORT_FTYPE_VOID
},
29000 { 0, CODE_FOR_fnclex
, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29006 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29008 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29009 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
29010 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
29011 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29012 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29013 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29014 { OPTION_MASK_ISA_XSAVES
, CODE_FOR_nothing
, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29015 { OPTION_MASK_ISA_XSAVES
, CODE_FOR_nothing
, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29016 { OPTION_MASK_ISA_XSAVEC
, CODE_FOR_nothing
, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29018 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
29019 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
29020 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29021 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29022 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29023 { OPTION_MASK_ISA_XSAVES
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29024 { OPTION_MASK_ISA_XSAVES
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29025 { OPTION_MASK_ISA_XSAVEC
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
29028 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
29029 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
29030 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
29032 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
29033 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
29034 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
29035 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
29037 /* SSE or 3DNow!A */
29038 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29039 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
29042 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29043 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29044 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
29045 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedquv16qi
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
29046 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
29047 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
29048 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
29049 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
29050 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
29051 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddquv16qi
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
29053 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
29054 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
29057 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
29060 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
29063 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
29064 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
29067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29070 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
29071 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
29072 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
29073 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
29074 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
29076 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
29077 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
29078 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
29079 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
29080 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddquv32qi
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
29081 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedquv32qi
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
29082 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
29084 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
29085 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
29086 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
29088 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
29089 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
29090 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
29091 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
29092 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
29093 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
29094 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
29095 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
29098 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
29099 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
29100 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
29101 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
29102 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
29103 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
29104 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
29105 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
29106 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
29109 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressstorev16sf_mask
, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512
, UNKNOWN
, (int) VOID_FTYPE_PV16SF_V16SF_HI
},
29110 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressstorev16si_mask
, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512
, UNKNOWN
, (int) VOID_FTYPE_PV16SI_V16SI_HI
},
29111 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressstorev8df_mask
, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512
, UNKNOWN
, (int) VOID_FTYPE_PV8DF_V8DF_QI
},
29112 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressstorev8di_mask
, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512
, UNKNOWN
, (int) VOID_FTYPE_PV8DI_V8DI_QI
},
29113 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16sf_mask
, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512
, UNKNOWN
, (int) V16SF_FTYPE_PCV16SF_V16SF_HI
},
29114 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16sf_maskz
, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z
, UNKNOWN
, (int) V16SF_FTYPE_PCV16SF_V16SF_HI
},
29115 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16si_mask
, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512
, UNKNOWN
, (int) V16SI_FTYPE_PCV16SI_V16SI_HI
},
29116 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16si_maskz
, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z
, UNKNOWN
, (int) V16SI_FTYPE_PCV16SI_V16SI_HI
},
29117 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8df_mask
, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512
, UNKNOWN
, (int) V8DF_FTYPE_PCV8DF_V8DF_QI
},
29118 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8df_maskz
, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z
, UNKNOWN
, (int) V8DF_FTYPE_PCV8DF_V8DF_QI
},
29119 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8di_mask
, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512
, UNKNOWN
, (int) V8DI_FTYPE_PCV8DI_V8DI_QI
},
29120 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8di_maskz
, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z
, UNKNOWN
, (int) V8DI_FTYPE_PCV8DI_V8DI_QI
},
29121 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loaddquv16si_mask
, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512
, UNKNOWN
, (int) V16SI_FTYPE_PCV16SI_V16SI_HI
},
29122 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loaddquv8di_mask
, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512
, UNKNOWN
, (int) V8DI_FTYPE_PCV8DI_V8DI_QI
},
29123 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadupd512_mask
, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512
, UNKNOWN
, (int) V8DF_FTYPE_PCV8DF_V8DF_QI
},
29124 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadups512_mask
, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512
, UNKNOWN
, (int) V16SF_FTYPE_PCV16SF_V16SF_HI
},
29125 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv16sf_mask
, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512
, UNKNOWN
, (int) V16SF_FTYPE_PCV16SF_V16SF_HI
},
29126 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv16si_mask
, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512
, UNKNOWN
, (int) V16SI_FTYPE_PCV16SI_V16SI_HI
},
29127 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv8df_mask
, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512
, UNKNOWN
, (int) V8DF_FTYPE_PCV8DF_V8DF_QI
},
29128 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv8di_mask
, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512
, UNKNOWN
, (int) V8DI_FTYPE_PCV8DI_V8DI_QI
},
29129 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_movntv16sf
, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V16SF
},
29130 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_movntv8df
, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V8DF
},
29131 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_movntv8di
, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512
, UNKNOWN
, (int) VOID_FTYPE_PV8DI_V8DI
},
29132 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_movntdqa
, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512
, UNKNOWN
, (int) V8DI_FTYPE_PV8DI
},
29133 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storedquv16si_mask
, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512
, UNKNOWN
, (int) VOID_FTYPE_PV16SI_V16SI_HI
},
29134 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storedquv8di_mask
, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512
, UNKNOWN
, (int) VOID_FTYPE_PV8DI_V8DI_QI
},
29135 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storeupd512_mask
, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512
, UNKNOWN
, (int) VOID_FTYPE_PV8DF_V8DF_QI
},
29136 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store
, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8DI_QI
},
29137 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store
, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8DI_QI
},
29138 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev8div8si2_mask_store
, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8DI_QI
},
29139 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store
, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV8HI_V8DI_QI
},
29140 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store
, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV8HI_V8DI_QI
},
29141 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev8div8hi2_mask_store
, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV8HI_V8DI_QI
},
29142 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store
, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16HI_V16SI_HI
},
29143 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store
, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16HI_V16SI_HI
},
29144 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store
, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16HI_V16SI_HI
},
29145 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev8div16qi2_mask_store
, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16QI_V8DI_QI
},
29146 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store
, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16QI_V8DI_QI
},
29147 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store
, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16QI_V8DI_QI
},
29148 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store
, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16QI_V16SI_HI
},
29149 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store
, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16QI_V16SI_HI
},
29150 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store
, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM
, UNKNOWN
, (int) VOID_FTYPE_PV16QI_V16SI_HI
},
29151 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storeups512_mask
, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512
, UNKNOWN
, (int) VOID_FTYPE_PV16SF_V16SF_HI
},
29152 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storev16sf_mask
, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512
, UNKNOWN
, (int) VOID_FTYPE_PV16SF_V16SF_HI
},
29153 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storev16si_mask
, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512
, UNKNOWN
, (int) VOID_FTYPE_PV16SI_V16SI_HI
},
29154 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storev8df_mask
, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512
, UNKNOWN
, (int) VOID_FTYPE_PV8DF_V8DF_QI
},
29155 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_storev8di_mask
, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512
, UNKNOWN
, (int) VOID_FTYPE_PV8DI_V8DI_QI
},
29157 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
29158 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
29159 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
29160 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
29161 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
29162 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
29165 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
29166 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
29167 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
29168 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
29169 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
29170 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
29171 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
29172 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
29175 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
29176 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
29177 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
29180 /* Builtins with variable number of arguments. */
29181 static const struct builtin_description bdesc_args
[] =
29183 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
29184 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
29185 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
29186 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
29187 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
29188 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
29189 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
29192 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29193 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29194 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29195 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29196 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29197 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29199 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29200 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29201 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29202 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29203 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29204 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29205 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29206 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29208 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29209 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29211 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29212 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29213 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29214 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29216 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29217 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29218 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29219 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29220 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29221 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29223 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29224 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29225 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29226 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29227 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29228 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29230 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
29231 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
29232 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
29234 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
29236 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
29237 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
29238 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
29239 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
29240 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
29241 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
29243 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
29244 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
29245 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
29246 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
29247 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
29248 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
29250 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
29251 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
29252 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
29253 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
29256 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
29257 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
29258 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
29259 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
29261 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29262 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29263 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29264 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
29265 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
29266 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
29267 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29268 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29269 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29270 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29271 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29272 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29273 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29274 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29275 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29278 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
29279 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
29280 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
29281 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
29282 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29283 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
29286 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
29287 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29288 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29289 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29290 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29291 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29292 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
29293 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
29294 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
29295 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
29296 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
29297 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
29299 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
29301 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29302 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29303 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29304 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29305 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29306 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29307 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29308 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29310 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
29311 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
29312 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
29313 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
29314 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
29315 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
29316 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
29317 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
29318 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
29319 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
29320 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
29321 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
29322 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
29323 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
29324 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
29325 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
29326 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
29327 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
29328 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
29329 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
29331 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29332 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29333 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29334 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29336 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29337 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29338 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29339 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29341 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29343 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29344 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29345 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29346 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29347 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29349 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
29350 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
29351 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
29353 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
29355 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
29356 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
29357 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
29359 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
29360 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
29362 /* SSE MMX or 3Dnow!A */
29363 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29364 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29365 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29367 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29368 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29369 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29370 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29372 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
29373 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
29375 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
29378 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
29380 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
29381 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
29382 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
29383 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
29384 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
29386 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
29387 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
29388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
29389 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
29390 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
29392 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
29394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
29395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
29396 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
29397 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
29399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_fix_notruncv4sfv4si
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
29400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
29401 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
29403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29405 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29408 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
29413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
29414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
29415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
29416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
29417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
29418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
29419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
29420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
29421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
29422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
29423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
29424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
29425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
29426 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
29427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
29428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
29429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
29430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
29431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
29433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29438 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29439 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29440 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29443 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29445 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29447 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
29451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29455 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29456 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29458 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29460 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29461 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29462 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29463 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29465 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29467 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29469 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29470 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
29472 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29473 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29474 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29475 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29478 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29481 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29482 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29483 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29484 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29489 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29490 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29492 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29496 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29497 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29501 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
29502 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
29503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
29505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
29508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
29509 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
29511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
29513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
29514 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
29515 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
29516 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
29518 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
29519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
29520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
29521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
29522 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
29523 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
29524 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
29526 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
29527 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
29528 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
29529 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
29530 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
29531 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
29532 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
29534 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
29535 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
29536 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
29537 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
29539 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
29540 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
29541 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
29543 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
29545 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
29548 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
29549 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
29552 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29553 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29555 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29556 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29557 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29558 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29559 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
29560 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
29563 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
29564 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
29565 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
29566 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
29567 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
29568 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
29570 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29571 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29572 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29573 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29574 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29575 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29576 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29577 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29578 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29579 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29580 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29581 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29582 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
29583 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
29584 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29585 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29586 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29587 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29588 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29589 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
29590 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29591 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
29592 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29593 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
29596 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
29597 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
29600 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
29601 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
29602 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
29603 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
29604 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
29605 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
29606 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
29607 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
29608 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
29609 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
29611 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
29612 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
29613 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
29614 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
29615 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
29616 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
29617 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
29618 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
29619 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
29620 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
29621 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
29622 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
29623 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
29625 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
29626 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29627 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29628 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29629 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29630 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29631 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
29632 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29633 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29634 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
29635 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
29636 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29639 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
29640 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
29641 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
29642 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
29644 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
29645 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
29646 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
29647 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
29649 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
29650 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
29652 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
29653 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
29655 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
29656 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
29657 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
29658 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
29660 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
29661 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
29663 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29664 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
29666 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
29667 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
29668 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
29671 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29672 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
29673 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
29674 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29675 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29678 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
29679 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
29680 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
29681 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29684 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
29685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
29687 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29688 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29689 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29693 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
29696 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29697 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29698 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29699 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29700 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29701 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29702 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29703 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29704 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29705 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29706 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29707 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29708 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29709 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29710 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29711 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29712 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29713 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29714 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29715 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29716 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29717 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29718 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29719 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29720 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29721 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29723 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
29724 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
29725 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
29726 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
29728 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
29729 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
29730 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
29731 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
29732 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
29733 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
29734 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
29735 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
29736 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
29737 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
29738 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
29739 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
29740 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
29741 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
29742 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
29743 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
29744 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
29745 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
29746 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
29747 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_fix_notruncv8sfv8si
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
29748 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
29749 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
29750 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
29751 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
29752 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
29753 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
29754 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
29755 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
29756 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
29757 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
29758 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
29759 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
29760 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
29761 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
29763 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29764 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29765 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
29767 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
29768 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29769 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29770 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29771 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29773 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29775 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
29776 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
29778 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
29779 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
29780 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
29781 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
29783 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
29784 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
29786 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
29787 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
29789 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
29790 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
29791 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
29792 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
29794 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
29795 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
29797 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
29798 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
29800 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29801 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29802 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29803 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29805 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
29806 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
29807 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
29808 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
29809 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
29810 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
29812 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
29813 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
29814 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
29815 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
29816 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
29817 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
29818 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
29819 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
29820 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
29821 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
29822 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
29823 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
29824 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
29825 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
29826 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
29828 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
29829 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
29831 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
29832 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
29834 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
29837 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
29838 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
29839 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
29840 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
29841 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
29842 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
29843 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
29844 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
29845 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29846 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29847 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29848 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29849 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29850 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29851 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29852 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29853 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
29854 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29855 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29856 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29857 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29858 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
29859 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
29860 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29861 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29862 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29863 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29864 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29865 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29866 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29867 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29868 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29869 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29870 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29871 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29872 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29873 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29874 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
29875 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
29876 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29877 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29878 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29879 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29880 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29881 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29882 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29883 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29884 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29885 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29886 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29887 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29888 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
29889 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
29890 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
29891 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
29892 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
29893 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
29894 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
29895 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
29896 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
29897 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
29898 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
29899 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
29900 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
29901 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
29902 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29903 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29904 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29905 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29906 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29907 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
29908 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
29910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29911 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
29912 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
29913 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
29914 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29915 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29916 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29917 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
29918 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
29919 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
29920 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
29921 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
29922 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
29923 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
29924 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
29925 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
29926 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
29927 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
29928 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
29929 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
29930 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
29931 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
29932 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
29933 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
29934 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
29935 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29936 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29937 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29938 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29939 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29940 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29941 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
29954 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
29955 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
29956 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
29957 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
29958 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
29959 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
29960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
29961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
29962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
29963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
29964 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
29965 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
29966 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29967 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
29968 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
29969 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
29970 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
29971 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx_vextractf128v4di
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
29972 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx_vinsertf128v4di
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
29973 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29974 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29975 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29976 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29977 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29978 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29979 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29980 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29981 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29982 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29984 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
29987 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29988 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29989 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
29992 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29993 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29996 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
29997 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
29998 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
29999 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
30002 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
30003 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
30004 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
30005 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
30006 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
30007 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
30010 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_si512_256si
, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256
, UNKNOWN
, (int) V16SI_FTYPE_V8SI
},
30011 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ps512_256ps
, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256
, UNKNOWN
, (int) V16SF_FTYPE_V8SF
},
30012 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_pd512_256pd
, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256
, UNKNOWN
, (int) V8DF_FTYPE_V4DF
},
30013 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_si512_si
, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI
, UNKNOWN
, (int) V16SI_FTYPE_V4SI
},
30014 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ps512_ps
, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS
, UNKNOWN
, (int) V16SF_FTYPE_V4SF
},
30015 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_pd512_pd
, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD
, UNKNOWN
, (int) V8DF_FTYPE_V2DF
},
30016 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_alignv16si_mask
, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI
},
30017 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_alignv8di_mask
, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI
},
30018 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_blendmv16si
, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30019 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_blendmv8df
, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30020 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_blendmv16sf
, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30021 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_blendmv8di
, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30022 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_broadcastv16sf_mask
, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512
, UNKNOWN
, (int) V16SF_FTYPE_V4SF_V16SF_HI
},
30023 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_broadcastv8df_mask
, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512
, UNKNOWN
, (int) V8DF_FTYPE_V4DF_V8DF_QI
},
30024 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_broadcastv16si_mask
, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512
, UNKNOWN
, (int) V16SI_FTYPE_V4SI_V16SI_HI
},
30025 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_broadcastv8di_mask
, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512
, UNKNOWN
, (int) V8DI_FTYPE_V4DI_V8DI_QI
},
30026 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vec_dupv8df_mask
, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512
, UNKNOWN
, (int) V8DF_FTYPE_V2DF_V8DF_QI
},
30027 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vec_dupv16sf_mask
, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512
, UNKNOWN
, (int) V16SF_FTYPE_V4SF_V16SF_HI
},
30028 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_cmpv16si3_mask
, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512
, UNKNOWN
, (int) HI_FTYPE_V16SI_V16SI_INT_HI
},
30029 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_cmpv8di3_mask
, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512
, UNKNOWN
, (int) QI_FTYPE_V8DI_V8DI_INT_QI
},
30030 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressv8df_mask
, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30031 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressv16sf_mask
, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30032 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_floatv8siv8df2_mask
, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512
, UNKNOWN
, (int) V8DF_FTYPE_V8SI_V8DF_QI
},
30033 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vcvtps2ph512_mask
, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512
, UNKNOWN
, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI
},
30034 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ufloatv8siv8df2_mask
, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512
, UNKNOWN
, (int) V8DF_FTYPE_V8SI_V8DF_QI
},
30035 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_cvtusi2sd32
, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_UINT
},
30036 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8df_mask
, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30037 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8df_maskz
, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30038 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16sf_mask
, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30039 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16sf_maskz
, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30040 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vextractf32x4_mask
, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4
, UNKNOWN
, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI
},
30041 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vextractf64x4_mask
, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4
, UNKNOWN
, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI
},
30042 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vextracti32x4_mask
, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4
, UNKNOWN
, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI
},
30043 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vextracti64x4_mask
, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4
, UNKNOWN
, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI
},
30044 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vinsertf32x4_mask
, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI
},
30045 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vinsertf64x4_mask
, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI
},
30046 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vinserti32x4_mask
, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI
},
30047 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vinserti64x4_mask
, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI
},
30048 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv8df_mask
, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30049 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv16sf_mask
, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30050 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_movddup512_mask
, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30051 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv16si_mask
, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30052 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_loadv8di_mask
, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30053 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_movshdup512_mask
, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30054 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_movsldup512_mask
, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30055 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_absv16si2_mask
, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30056 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_absv8di2_mask
, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30057 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_addv16si3_mask
, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30058 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_addv8di3_mask
, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30059 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_andv16si3_mask
, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30060 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_andnotv16si3_mask
, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30061 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_andnotv8di3_mask
, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30062 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_andv8di3_mask
, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30063 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vec_dupv16si_mask
, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512
, UNKNOWN
, (int) V16SI_FTYPE_V4SI_V16SI_HI
},
30064 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vec_dup_gprv16si_mask
, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR
, UNKNOWN
, (int) V16SI_FTYPE_SI_V16SI_HI
},
30065 { OPTION_MASK_ISA_AVX512CD
, CODE_FOR_avx512cd_maskb_vec_dupv8di
, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512
, UNKNOWN
, (int) V8DI_FTYPE_QI
},
30066 { OPTION_MASK_ISA_AVX512CD
, CODE_FOR_avx512cd_maskw_vec_dupv16si
, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512
, UNKNOWN
, (int) V16SI_FTYPE_HI
},
30067 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vec_dupv8di_mask
, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512
, UNKNOWN
, (int) V8DI_FTYPE_V2DI_V8DI_QI
},
30068 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_avx512f_vec_dup_gprv8di_mask
, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR
, UNKNOWN
, (int) V8DI_FTYPE_DI_V8DI_QI
},
30069 { OPTION_MASK_ISA_AVX512F
& ~OPTION_MASK_ISA_64BIT
, CODE_FOR_avx512f_vec_dup_memv8di_mask
, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM
, UNKNOWN
, (int) V8DI_FTYPE_DI_V8DI_QI
},
30070 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_eqv16si3_mask
, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK
, UNKNOWN
, (int) HI_FTYPE_V16SI_V16SI_HI
},
30071 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_eqv8di3_mask
, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK
, UNKNOWN
, (int) QI_FTYPE_V8DI_V8DI_QI
},
30072 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_gtv16si3_mask
, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK
, UNKNOWN
, (int) HI_FTYPE_V16SI_V16SI_HI
},
30073 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_gtv8di3_mask
, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK
, UNKNOWN
, (int) QI_FTYPE_V8DI_V8DI_QI
},
30074 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressv16si_mask
, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30075 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_compressv8di_mask
, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30076 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16si_mask
, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30077 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv16si_maskz
, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30078 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8di_mask
, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30079 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_expandv8di_maskz
, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30080 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_smaxv16si3_mask
, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30081 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_smaxv8di3_mask
, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30082 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_umaxv16si3_mask
, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30083 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_umaxv8di3_mask
, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30084 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sminv16si3_mask
, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30085 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sminv8di3_mask
, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30086 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_uminv16si3_mask
, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30087 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_uminv8di3_mask
, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30088 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev16siv16qi2_mask
, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512
, UNKNOWN
, (int) V16QI_FTYPE_V16SI_V16QI_HI
},
30089 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev16siv16hi2_mask
, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512
, UNKNOWN
, (int) V16HI_FTYPE_V16SI_V16HI_HI
},
30090 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev8div16qi2_mask
, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512
, UNKNOWN
, (int) V16QI_FTYPE_V8DI_V16QI_QI
},
30091 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev8div8si2_mask
, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512
, UNKNOWN
, (int) V8SI_FTYPE_V8DI_V8SI_QI
},
30092 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_truncatev8div8hi2_mask
, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512
, UNKNOWN
, (int) V8HI_FTYPE_V8DI_V8HI_QI
},
30093 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask
, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512
, UNKNOWN
, (int) V16QI_FTYPE_V16SI_V16QI_HI
},
30094 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask
, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512
, UNKNOWN
, (int) V16HI_FTYPE_V16SI_V16HI_HI
},
30095 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask
, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512
, UNKNOWN
, (int) V16QI_FTYPE_V8DI_V16QI_QI
},
30096 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev8div8si2_mask
, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512
, UNKNOWN
, (int) V8SI_FTYPE_V8DI_V8SI_QI
},
30097 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask
, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512
, UNKNOWN
, (int) V8HI_FTYPE_V8DI_V8HI_QI
},
30098 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask
, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512
, UNKNOWN
, (int) V16SI_FTYPE_V16QI_V16SI_HI
},
30099 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask
, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512
, UNKNOWN
, (int) V8DI_FTYPE_V16QI_V8DI_QI
},
30100 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sign_extendv8siv8di2_mask
, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8SI_V8DI_QI
},
30101 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask
, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512
, UNKNOWN
, (int) V16SI_FTYPE_V16HI_V16SI_HI
},
30102 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask
, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8HI_V8DI_QI
},
30103 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask
, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512
, UNKNOWN
, (int) V16QI_FTYPE_V16SI_V16QI_HI
},
30104 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask
, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512
, UNKNOWN
, (int) V16HI_FTYPE_V16SI_V16HI_HI
},
30105 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev8div16qi2_mask
, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512
, UNKNOWN
, (int) V16QI_FTYPE_V8DI_V16QI_QI
},
30106 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev8div8si2_mask
, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512
, UNKNOWN
, (int) V8SI_FTYPE_V8DI_V8SI_QI
},
30107 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_us_truncatev8div8hi2_mask
, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512
, UNKNOWN
, (int) V8HI_FTYPE_V8DI_V8HI_QI
},
30108 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask
, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512
, UNKNOWN
, (int) V16SI_FTYPE_V16QI_V16SI_HI
},
30109 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask
, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512
, UNKNOWN
, (int) V8DI_FTYPE_V16QI_V8DI_QI
},
30110 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_zero_extendv8siv8di2_mask
, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8SI_V8DI_QI
},
30111 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask
, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512
, UNKNOWN
, (int) V16SI_FTYPE_V16HI_V16SI_HI
},
30112 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask
, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8HI_V8DI_QI
},
30113 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_vec_widen_smult_even_v16si_mask
, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI
},
30114 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_mulv16si3_mask
, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30115 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_vec_widen_umult_even_v16si_mask
, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI
},
30116 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_iorv16si3_mask
, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30117 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_iorv8di3_mask
, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30118 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rolv16si_mask
, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI
},
30119 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rolv8di_mask
, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI
},
30120 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rolvv16si_mask
, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30121 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rolvv8di_mask
, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30122 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rorv16si_mask
, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI
},
30123 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rorv8di_mask
, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI
},
30124 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rorvv16si_mask
, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30125 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rorvv8di_mask
, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30126 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_pshufdv3_mask
, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI
},
30127 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashlv16si3_mask
, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI
},
30128 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashlv16si3_mask
, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI
},
30129 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashlv8di3_mask
, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI
},
30130 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashlv8di3_mask
, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI
},
30131 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ashlvv16si_mask
, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30132 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ashlvv8di_mask
, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30133 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashrv16si3_mask
, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI
},
30134 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashrv16si3_mask
, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI
},
30135 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashrv8di3_mask
, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI
},
30136 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ashrv8di3_mask
, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI
},
30137 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ashrvv16si_mask
, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30138 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ashrvv8di_mask
, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30139 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_lshrv16si3_mask
, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI
},
30140 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_lshrv16si3_mask
, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI
},
30141 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_lshrv8di3_mask
, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI
},
30142 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_lshrv8di3_mask
, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI
},
30143 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_lshrvv16si_mask
, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30144 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_lshrvv8di_mask
, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30145 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_subv16si3_mask
, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30146 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_subv8di3_mask
, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30147 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_testmv16si3_mask
, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512
, UNKNOWN
, (int) HI_FTYPE_V16SI_V16SI_HI
},
30148 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_testmv8di3_mask
, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512
, UNKNOWN
, (int) QI_FTYPE_V8DI_V8DI_QI
},
30149 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_testnmv16si3_mask
, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512
, UNKNOWN
, (int) HI_FTYPE_V16SI_V16SI_HI
},
30150 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_testnmv8di3_mask
, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512
, UNKNOWN
, (int) QI_FTYPE_V8DI_V8DI_QI
},
30151 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_interleave_highv16si_mask
, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30152 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_interleave_highv8di_mask
, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30153 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_interleave_lowv16si_mask
, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30154 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_interleave_lowv8di_mask
, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30155 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_xorv16si3_mask
, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30156 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_xorv8di3_mask
, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30157 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_rcp14v8df_mask
, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30158 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_rcp14v16sf_mask
, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30159 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_srcp14v2df
, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
30160 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_srcp14v4sf
, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
30161 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_rsqrt14v8df_mask
, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI
},
30162 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_rsqrt14v16sf_mask
, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI
},
30163 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_rsqrt14v2df
, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
30164 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_rsqrt14v4sf
, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
30165 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_shufpd512_mask
, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI
},
30166 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_shufps512_mask
, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI
},
30167 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_shuf_f32x4_mask
, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI
},
30168 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_shuf_f64x2_mask
, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI
},
30169 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_shuf_i32x4_mask
, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI
},
30170 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_shuf_i64x2_mask
, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI
},
30171 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ucmpv16si3_mask
, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512
, UNKNOWN
, (int) HI_FTYPE_V16SI_V16SI_INT_HI
},
30172 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ucmpv8di3_mask
, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512
, UNKNOWN
, (int) QI_FTYPE_V8DI_V8DI_INT_QI
},
30173 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_unpckhpd512_mask
, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI
},
30174 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_unpckhps512_mask
, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI
},
30175 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_unpcklpd512_mask
, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI
},
30176 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_unpcklps512_mask
, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI
},
30177 { OPTION_MASK_ISA_AVX512CD
, CODE_FOR_clzv16si2_mask
, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30178 { OPTION_MASK_ISA_AVX512CD
, CODE_FOR_clzv8di2_mask
, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30179 { OPTION_MASK_ISA_AVX512CD
, CODE_FOR_conflictv16si_mask
, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_HI
},
30180 { OPTION_MASK_ISA_AVX512CD
, CODE_FOR_conflictv8di_mask
, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_QI
},
30181 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_permv8df_mask
, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI
},
30182 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_permv8di_mask
, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI
},
30183 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermi2varv16si3_mask
, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30184 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermi2varv8df3_mask
, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI
},
30185 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermi2varv16sf3_mask
, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI
},
30186 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermi2varv8di3_mask
, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30187 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermilv8df_mask
, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI
},
30188 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermilv16sf_mask
, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI
},
30189 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermilvarv8df3_mask
, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI
},
30190 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermilvarv16sf3_mask
, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI
},
30191 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv16si3_mask
, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30192 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv16si3_maskz
, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30193 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv8df3_mask
, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI
},
30194 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv8df3_maskz
, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ
, UNKNOWN
, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI
},
30195 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv16sf3_mask
, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI
},
30196 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv16sf3_maskz
, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ
, UNKNOWN
, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI
},
30197 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv8di3_mask
, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30198 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vpermt2varv8di3_maskz
, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30199 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_permvarv8df_mask
, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI
},
30200 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_permvarv8di_mask
, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI
},
30201 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_permvarv16sf_mask
, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI
},
30202 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_permvarv16si_mask
, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI
},
30203 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vternlogv16si_mask
, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI
},
30204 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vternlogv16si_maskz
, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ
, UNKNOWN
, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI
},
30205 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vternlogv8di_mask
, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI
},
30206 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vternlogv8di_maskz
, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ
, UNKNOWN
, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI
},
30208 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_copysignv16sf3
, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF
},
30209 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_copysignv8df3
, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF
},
30210 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sqrtv8df2
, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF
},
30211 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sqrtv16sf2
, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF
},
30212 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_exp2v16sf
, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS
, UNKNOWN
, (int) V16SF_FTYPE_V16SF
},
30213 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_roundv8df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512
, UNKNOWN
, (int) V16SI_FTYPE_V8DF_V8DF
},
30214 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_roundpd_vec_pack_sfix512
, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512
, (enum rtx_code
) ROUND_FLOOR
, (int) V16SI_FTYPE_V8DF_V8DF_ROUND
},
30215 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_roundpd_vec_pack_sfix512
, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512
, (enum rtx_code
) ROUND_CEIL
, (int) V16SI_FTYPE_V8DF_V8DF_ROUND
},
30217 /* Mask arithmetic operations */
30218 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_andhi3
, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30219 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_kandnhi
, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30220 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_one_cmplhi2
, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16
, UNKNOWN
, (int) HI_FTYPE_HI
},
30221 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_iorhi3
, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30222 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_kortestchi
, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30223 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_kortestzhi
, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30224 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_kunpckhi
, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30225 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_kxnorhi
, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30226 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_xorhi3
, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16
, UNKNOWN
, (int) HI_FTYPE_HI_HI
},
30227 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_kmovw
, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16
, UNKNOWN
, (int) HI_FTYPE_HI
},
30230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sha1msg1
, 0, IX86_BUILTIN_SHA1MSG1
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
30231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sha1msg2
, 0, IX86_BUILTIN_SHA1MSG2
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
30232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sha1nexte
, 0, IX86_BUILTIN_SHA1NEXTE
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
30233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sha1rnds4
, 0, IX86_BUILTIN_SHA1RNDS4
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
30234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sha256msg1
, 0, IX86_BUILTIN_SHA256MSG1
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
30235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sha256msg2
, 0, IX86_BUILTIN_SHA256MSG2
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
30236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sha256rnds2
, 0, IX86_BUILTIN_SHA256RNDS2
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_V4SI
},
30239 /* Builtins with rounding support. */
30240 static const struct builtin_description bdesc_round_args
[] =
30243 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_addv8df3_mask_round
, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30244 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_addv16sf3_mask_round
, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30245 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_vmaddv2df3_round
, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30246 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_vmaddv4sf3_round
, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30247 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_cmpv8df3_mask_round
, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512
, UNKNOWN
, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT
},
30248 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_cmpv16sf3_mask_round
, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512
, UNKNOWN
, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT
},
30249 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vmcmpv2df3_mask_round
, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK
, UNKNOWN
, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT
},
30250 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vmcmpv4sf3_mask_round
, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK
, UNKNOWN
, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT
},
30251 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_comi_round
, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF
, UNKNOWN
, (int) INT_FTYPE_V2DF_V2DF_INT_INT
},
30252 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_comi_round
, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF
, UNKNOWN
, (int) INT_FTYPE_V4SF_V4SF_INT_INT
},
30253 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_floatv16siv16sf2_mask_round
, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT
},
30254 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_cvtpd2dq512_mask_round
, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512
, UNKNOWN
, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT
},
30255 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_cvtpd2ps512_mask_round
, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512
, UNKNOWN
, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT
},
30256 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ufix_notruncv8dfv8si2_mask_round
, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512
, UNKNOWN
, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT
},
30257 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vcvtph2ps512_mask_round
, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512
, UNKNOWN
, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT
},
30258 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round
, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512
, UNKNOWN
, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT
},
30259 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_cvtps2pd512_mask_round
, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512
, UNKNOWN
, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT
},
30260 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round
, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512
, UNKNOWN
, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT
},
30261 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_cvtsd2ss_round
, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF_INT
},
30262 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq_round
, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT64_INT
},
30263 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_cvtsi2ss_round
, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT_INT
},
30264 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq_round
, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT64_INT
},
30265 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_cvtss2sd_round
, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF_INT
},
30266 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_fix_truncv8dfv8si2_mask_round
, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512
, UNKNOWN
, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT
},
30267 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ufix_truncv8dfv8si2_mask_round
, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512
, UNKNOWN
, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT
},
30268 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_fix_truncv16sfv16si2_mask_round
, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512
, UNKNOWN
, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT
},
30269 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ufix_truncv16sfv16si2_mask_round
, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512
, UNKNOWN
, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT
},
30270 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_ufloatv16siv16sf2_mask_round
, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT
},
30271 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_cvtusi2sd64_round
, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_UINT64_INT
},
30272 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_cvtusi2ss32_round
, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_UINT_INT
},
30273 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_cvtusi2ss64_round
, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_UINT64_INT
},
30274 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_divv8df3_mask_round
, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30275 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_divv16sf3_mask_round
, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30276 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_vmdivv2df3_round
, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30277 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_vmdivv4sf3_round
, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30278 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fixupimmv8df_mask_round
, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT
},
30279 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fixupimmv8df_maskz_round
, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT
},
30280 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fixupimmv16sf_mask_round
, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT
},
30281 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fixupimmv16sf_maskz_round
, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT
},
30282 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sfixupimmv2df_mask_round
, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT
},
30283 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sfixupimmv2df_maskz_round
, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT
},
30284 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sfixupimmv4sf_mask_round
, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT
},
30285 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round
, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT
},
30286 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_getexpv8df_mask_round
, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT
},
30287 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_getexpv16sf_mask_round
, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT
},
30288 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sgetexpv2df_round
, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30289 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sgetexpv4sf_round
, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30290 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_getmantv8df_mask_round
, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT
},
30291 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_getmantv16sf_mask_round
, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT
},
30292 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vgetmantv2df_round
, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT
},
30293 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vgetmantv4sf_round
, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT
},
30294 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_smaxv8df3_mask_round
, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30295 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_smaxv16sf3_mask_round
, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30296 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_vmsmaxv2df3_round
, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30297 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_vmsmaxv4sf3_round
, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30298 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sminv8df3_mask_round
, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30299 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sminv16sf3_mask_round
, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30300 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_vmsminv2df3_round
, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30301 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_vmsminv4sf3_round
, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30302 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_mulv8df3_mask_round
, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30303 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_mulv16sf3_mask_round
, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30304 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_vmmulv2df3_round
, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30305 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_vmmulv4sf3_round
, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30306 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rndscalev8df_mask_round
, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT
},
30307 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rndscalev16sf_mask_round
, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT
},
30308 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rndscalev2df_round
, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT
},
30309 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_rndscalev4sf_round
, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT
},
30310 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_scalefv8df_mask_round
, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30311 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_scalefv16sf_mask_round
, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30312 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vmscalefv2df_round
, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30313 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vmscalefv4sf_round
, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30314 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sqrtv8df2_mask_round
, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT
},
30315 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_sqrtv16sf2_mask_round
, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT
},
30316 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_vmsqrtv2df2_round
, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30317 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_vmsqrtv4sf2_round
, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30318 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_subv8df3_mask_round
, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30319 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_subv16sf3_mask_round
, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30320 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_vmsubv2df3_round
, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30321 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_vmsubv4sf3_round
, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30322 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_cvtsd2si_round
, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32
, UNKNOWN
, (int) INT_FTYPE_V2DF_INT
},
30323 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq_round
, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF_INT
},
30324 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vcvtsd2usi_round
, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32
, UNKNOWN
, (int) UINT_FTYPE_V2DF_INT
},
30325 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_avx512f_vcvtsd2usiq_round
, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64
, UNKNOWN
, (int) UINT64_FTYPE_V2DF_INT
},
30326 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_cvtss2si_round
, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32
, UNKNOWN
, (int) INT_FTYPE_V4SF_INT
},
30327 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq_round
, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF_INT
},
30328 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vcvtss2usi_round
, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32
, UNKNOWN
, (int) UINT_FTYPE_V4SF_INT
},
30329 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_avx512f_vcvtss2usiq_round
, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64
, UNKNOWN
, (int) UINT64_FTYPE_V4SF_INT
},
30330 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse2_cvttsd2si_round
, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32
, UNKNOWN
, (int) INT_FTYPE_V2DF_INT
},
30331 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq_round
, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF_INT
},
30332 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vcvttsd2usi_round
, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32
, UNKNOWN
, (int) UINT_FTYPE_V2DF_INT
},
30333 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_avx512f_vcvttsd2usiq_round
, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64
, UNKNOWN
, (int) UINT64_FTYPE_V2DF_INT
},
30334 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_sse_cvttss2si_round
, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32
, UNKNOWN
, (int) INT_FTYPE_V4SF_INT
},
30335 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq_round
, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF_INT
},
30336 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_vcvttss2usi_round
, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32
, UNKNOWN
, (int) UINT_FTYPE_V4SF_INT
},
30337 { OPTION_MASK_ISA_AVX512F
| OPTION_MASK_ISA_64BIT
, CODE_FOR_avx512f_vcvttss2usiq_round
, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64
, UNKNOWN
, (int) UINT64_FTYPE_V4SF_INT
},
30338 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmadd_v8df_mask_round
, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30339 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmadd_v8df_mask3_round
, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30340 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmadd_v8df_maskz_round
, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30341 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmadd_v16sf_mask_round
, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30342 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmadd_v16sf_mask3_round
, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30343 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmadd_v16sf_maskz_round
, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30344 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_fmai_vmfmadd_v2df_round
, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT
},
30345 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_fmai_vmfmadd_v4sf_round
, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT
},
30346 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmaddsub_v8df_mask_round
, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30347 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round
, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30348 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round
, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30349 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round
, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30350 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round
, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30351 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round
, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30352 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round
, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30353 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round
, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30354 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmsub_v8df_mask3_round
, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30355 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fmsub_v16sf_mask3_round
, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30356 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fnmadd_v8df_mask_round
, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30357 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fnmadd_v16sf_mask_round
, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30358 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fnmsub_v8df_mask_round
, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30359 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fnmsub_v8df_mask3_round
, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
},
30360 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fnmsub_v16sf_mask_round
, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30361 { OPTION_MASK_ISA_AVX512F
, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round
, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
},
30364 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_exp2v8df_mask_round
, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT
},
30365 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_exp2v16sf_mask_round
, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT
},
30366 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_rcp28v8df_mask_round
, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT
},
30367 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_rcp28v16sf_mask_round
, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT
},
30368 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_vmrcp28v2df_round
, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30369 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_vmrcp28v4sf_round
, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30370 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_rsqrt28v8df_mask_round
, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD
, UNKNOWN
, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT
},
30371 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_rsqrt28v16sf_mask_round
, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS
, UNKNOWN
, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT
},
30372 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_vmrsqrt28v2df_round
, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
30373 { OPTION_MASK_ISA_AVX512ER
, CODE_FOR_avx512er_vmrsqrt28v4sf_round
, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
30376 /* FMA4 and XOP. */
30377 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30378 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30379 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30380 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30381 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30382 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30383 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30384 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30385 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30386 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30387 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30388 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30389 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30390 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30391 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30392 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30393 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30394 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30395 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30396 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30397 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30398 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30399 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30400 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30401 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30402 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30403 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30404 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30405 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30406 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30407 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30408 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30409 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30410 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30411 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30412 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30413 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30414 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30415 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30416 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30417 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30418 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30419 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30420 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30421 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30422 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30423 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30424 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30425 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30426 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30427 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30428 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30430 static const struct builtin_description bdesc_multi_arg
[] =
30432 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
30433 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
30434 UNKNOWN
, (int)MULTI_ARG_3_SF
},
30435 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
30436 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
30437 UNKNOWN
, (int)MULTI_ARG_3_DF
},
30439 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
30440 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
30441 UNKNOWN
, (int)MULTI_ARG_3_SF
},
30442 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
30443 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
30444 UNKNOWN
, (int)MULTI_ARG_3_DF
},
30446 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
30447 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
30448 UNKNOWN
, (int)MULTI_ARG_3_SF
},
30449 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
30450 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
30451 UNKNOWN
, (int)MULTI_ARG_3_DF
},
30452 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
30453 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
30454 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
30455 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
30456 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
30457 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
30459 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
30460 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
30461 UNKNOWN
, (int)MULTI_ARG_3_SF
},
30462 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
30463 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
30464 UNKNOWN
, (int)MULTI_ARG_3_DF
},
30465 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
30466 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
30467 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
30468 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
30469 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
30470 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
30472 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
30473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
30474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
30475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
30476 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
30477 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
30478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
30480 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
30481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
30482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
30483 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
30484 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
30485 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
30486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
30488 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
30490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
30491 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
30492 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
30493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
30494 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
30495 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
30496 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
30497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
30498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
30499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
30500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
30501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
30503 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
30504 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
30505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
30506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
30507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
30508 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
30509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
30510 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
30511 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
30512 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
30513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
30514 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
30515 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
30516 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
30517 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
30518 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
30520 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
30521 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
30522 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
30523 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
30524 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
30525 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
30527 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
30528 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
30529 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
30530 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
30531 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
30532 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
30533 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
30534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
30535 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
30536 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
30537 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
30538 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
30539 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
30540 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
30541 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
30543 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
30544 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
30545 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
30546 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
30547 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
30548 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
30549 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
30551 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
30552 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
30553 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
30554 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
30555 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
30556 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
30557 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
30559 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
30560 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
30561 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
30562 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
30563 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
30564 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
30565 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
30567 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
30568 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
30569 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
30570 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
30571 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
30572 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
30573 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
30575 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
30576 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
30577 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
30578 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
30579 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
30580 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
30581 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
30583 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
30584 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
30585 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
30586 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
30587 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
30588 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
30589 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
30591 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
30592 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
30593 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
30594 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
30595 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
30596 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
30597 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
30599 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
30600 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
30601 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
30602 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
30603 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
30604 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
30605 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
30607 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
30608 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
30609 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
30610 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
30611 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
30612 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
30613 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
30614 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
30616 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
30617 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
30618 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
30619 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
30620 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
30621 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
30622 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
30623 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
30625 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
30626 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
30627 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
30628 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
30632 /* TM vector builtins. */
30634 /* Reuse the existing x86-specific `struct builtin_description' cause
30635 we're lazy. Add casts to make them fit. */
30636 static const struct builtin_description bdesc_tm
[] =
30638 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
30639 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
30640 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
30641 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30642 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30643 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30644 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30646 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
30647 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
30648 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
30649 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30650 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30651 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30652 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30654 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
30655 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
30656 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
30657 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30658 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30659 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30660 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30662 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
30663 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
30664 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
30667 /* TM callbacks. */
30669 /* Return the builtin decl needed to load a vector of TYPE. */
30672 ix86_builtin_tm_load (tree type
)
30674 if (TREE_CODE (type
) == VECTOR_TYPE
)
30676 switch (tree_to_uhwi (TYPE_SIZE (type
)))
30679 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
30681 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
30683 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
30689 /* Return the builtin decl needed to store a vector of TYPE. */
30692 ix86_builtin_tm_store (tree type
)
30694 if (TREE_CODE (type
) == VECTOR_TYPE
)
30696 switch (tree_to_uhwi (TYPE_SIZE (type
)))
30699 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
30701 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
30703 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
30709 /* Initialize the transactional memory vector load/store builtins. */
30712 ix86_init_tm_builtins (void)
30714 enum ix86_builtin_func_type ftype
;
30715 const struct builtin_description
*d
;
30718 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
30719 tree attrs_log
, attrs_type_log
;
30724 /* If there are no builtins defined, we must be compiling in a
30725 language without trans-mem support. */
30726 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
30729 /* Use whatever attributes a normal TM load has. */
30730 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
30731 attrs_load
= DECL_ATTRIBUTES (decl
);
30732 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
30733 /* Use whatever attributes a normal TM store has. */
30734 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
30735 attrs_store
= DECL_ATTRIBUTES (decl
);
30736 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
30737 /* Use whatever attributes a normal TM log has. */
30738 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
30739 attrs_log
= DECL_ATTRIBUTES (decl
);
30740 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
30742 for (i
= 0, d
= bdesc_tm
;
30743 i
< ARRAY_SIZE (bdesc_tm
);
30746 if ((d
->mask
& ix86_isa_flags
) != 0
30747 || (lang_hooks
.builtin_function
30748 == lang_hooks
.builtin_function_ext_scope
))
30750 tree type
, attrs
, attrs_type
;
30751 enum built_in_function code
= (enum built_in_function
) d
->code
;
30753 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30754 type
= ix86_get_builtin_func_type (ftype
);
30756 if (BUILTIN_TM_LOAD_P (code
))
30758 attrs
= attrs_load
;
30759 attrs_type
= attrs_type_load
;
30761 else if (BUILTIN_TM_STORE_P (code
))
30763 attrs
= attrs_store
;
30764 attrs_type
= attrs_type_store
;
30769 attrs_type
= attrs_type_log
;
30771 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
30772 /* The builtin without the prefix for
30773 calling it directly. */
30774 d
->name
+ strlen ("__builtin_"),
30776 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30777 set the TYPE_ATTRIBUTES. */
30778 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
30780 set_builtin_decl (code
, decl
, false);
30785 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30786 in the current target ISA to allow the user to compile particular modules
30787 with different target specific options that differ from the command line
30790 ix86_init_mmx_sse_builtins (void)
30792 const struct builtin_description
* d
;
30793 enum ix86_builtin_func_type ftype
;
30796 /* Add all special builtins with variable number of operands. */
30797 for (i
= 0, d
= bdesc_special_args
;
30798 i
< ARRAY_SIZE (bdesc_special_args
);
30804 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30805 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
30808 /* Add all builtins with variable number of operands. */
30809 for (i
= 0, d
= bdesc_args
;
30810 i
< ARRAY_SIZE (bdesc_args
);
30816 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30817 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30820 /* Add all builtins with rounding. */
30821 for (i
= 0, d
= bdesc_round_args
;
30822 i
< ARRAY_SIZE (bdesc_round_args
);
30828 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30829 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30832 /* pcmpestr[im] insns. */
30833 for (i
= 0, d
= bdesc_pcmpestr
;
30834 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30837 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30838 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
30840 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
30841 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30844 /* pcmpistr[im] insns. */
30845 for (i
= 0, d
= bdesc_pcmpistr
;
30846 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30849 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30850 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
30852 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
30853 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30856 /* comi/ucomi insns. */
30857 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30859 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
30860 ftype
= INT_FTYPE_V2DF_V2DF
;
30862 ftype
= INT_FTYPE_V4SF_V4SF
;
30863 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30867 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
30868 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
30869 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
30870 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
30872 /* SSE or 3DNow!A */
30873 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
30874 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
30875 IX86_BUILTIN_MASKMOVQ
);
30878 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
30879 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
30881 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
30882 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
30883 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
30884 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
30887 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
30888 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
30889 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
30890 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
30893 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
30894 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
30895 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
30896 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
30897 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
30898 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
30899 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
30900 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
30901 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
30902 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
30903 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
30904 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
30907 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
30908 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
30911 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
30912 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
30913 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
30914 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
30915 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
30916 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
30917 IX86_BUILTIN_RDRAND64_STEP
);
30920 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
30921 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
30922 IX86_BUILTIN_GATHERSIV2DF
);
30924 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
30925 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
30926 IX86_BUILTIN_GATHERSIV4DF
);
30928 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
30929 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
30930 IX86_BUILTIN_GATHERDIV2DF
);
30932 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
30933 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
30934 IX86_BUILTIN_GATHERDIV4DF
);
30936 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
30937 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
30938 IX86_BUILTIN_GATHERSIV4SF
);
30940 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
30941 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
30942 IX86_BUILTIN_GATHERSIV8SF
);
30944 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
30945 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
30946 IX86_BUILTIN_GATHERDIV4SF
);
30948 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
30949 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
30950 IX86_BUILTIN_GATHERDIV8SF
);
30952 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
30953 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
30954 IX86_BUILTIN_GATHERSIV2DI
);
30956 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
30957 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
30958 IX86_BUILTIN_GATHERSIV4DI
);
30960 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
30961 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
30962 IX86_BUILTIN_GATHERDIV2DI
);
30964 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
30965 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
30966 IX86_BUILTIN_GATHERDIV4DI
);
30968 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
30969 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
30970 IX86_BUILTIN_GATHERSIV4SI
);
30972 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
30973 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
30974 IX86_BUILTIN_GATHERSIV8SI
);
30976 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
30977 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
30978 IX86_BUILTIN_GATHERDIV4SI
);
30980 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
30981 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
30982 IX86_BUILTIN_GATHERDIV8SI
);
30984 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
30985 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
30986 IX86_BUILTIN_GATHERALTSIV4DF
);
30988 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
30989 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
30990 IX86_BUILTIN_GATHERALTDIV8SF
);
30992 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
30993 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
30994 IX86_BUILTIN_GATHERALTSIV4DI
);
30996 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
30997 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
30998 IX86_BUILTIN_GATHERALTDIV8SI
);
31001 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv16sf",
31002 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT
,
31003 IX86_BUILTIN_GATHER3SIV16SF
);
31005 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv8df",
31006 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT
,
31007 IX86_BUILTIN_GATHER3SIV8DF
);
31009 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv16sf",
31010 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT
,
31011 IX86_BUILTIN_GATHER3DIV16SF
);
31013 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv8df",
31014 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT
,
31015 IX86_BUILTIN_GATHER3DIV8DF
);
31017 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv16si",
31018 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT
,
31019 IX86_BUILTIN_GATHER3SIV16SI
);
31021 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv8di",
31022 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT
,
31023 IX86_BUILTIN_GATHER3SIV8DI
);
31025 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv16si",
31026 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT
,
31027 IX86_BUILTIN_GATHER3DIV16SI
);
31029 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv8di",
31030 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT
,
31031 IX86_BUILTIN_GATHER3DIV8DI
);
31033 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltsiv8df ",
31034 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT
,
31035 IX86_BUILTIN_GATHER3ALTSIV8DF
);
31037 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltdiv8sf ",
31038 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT
,
31039 IX86_BUILTIN_GATHER3ALTDIV16SF
);
31041 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltsiv8di ",
31042 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT
,
31043 IX86_BUILTIN_GATHER3ALTSIV8DI
);
31045 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltdiv8si ",
31046 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT
,
31047 IX86_BUILTIN_GATHER3ALTDIV16SI
);
31049 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv16sf",
31050 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT
,
31051 IX86_BUILTIN_SCATTERSIV16SF
);
31053 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv8df",
31054 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT
,
31055 IX86_BUILTIN_SCATTERSIV8DF
);
31057 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv16sf",
31058 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT
,
31059 IX86_BUILTIN_SCATTERDIV16SF
);
31061 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv8df",
31062 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT
,
31063 IX86_BUILTIN_SCATTERDIV8DF
);
31065 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv16si",
31066 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT
,
31067 IX86_BUILTIN_SCATTERSIV16SI
);
31069 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv8di",
31070 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT
,
31071 IX86_BUILTIN_SCATTERSIV8DI
);
31073 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv16si",
31074 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT
,
31075 IX86_BUILTIN_SCATTERDIV16SI
);
31077 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv8di",
31078 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT
,
31079 IX86_BUILTIN_SCATTERDIV8DI
);
31082 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfdpd",
31083 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT
,
31084 IX86_BUILTIN_GATHERPFDPD
);
31085 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfdps",
31086 VOID_FTYPE_HI_V16SI_PCINT_INT_INT
,
31087 IX86_BUILTIN_GATHERPFDPS
);
31088 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfqpd",
31089 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT
,
31090 IX86_BUILTIN_GATHERPFQPD
);
31091 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfqps",
31092 VOID_FTYPE_QI_V8DI_PCINT_INT_INT
,
31093 IX86_BUILTIN_GATHERPFQPS
);
31094 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfdpd",
31095 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT
,
31096 IX86_BUILTIN_SCATTERPFDPD
);
31097 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfdps",
31098 VOID_FTYPE_HI_V16SI_PCINT_INT_INT
,
31099 IX86_BUILTIN_SCATTERPFDPS
);
31100 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfqpd",
31101 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT
,
31102 IX86_BUILTIN_SCATTERPFQPD
);
31103 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfqps",
31104 VOID_FTYPE_QI_V8DI_PCINT_INT_INT
,
31105 IX86_BUILTIN_SCATTERPFQPS
);
31108 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1msg1",
31109 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1MSG1
);
31110 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1msg2",
31111 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1MSG2
);
31112 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1nexte",
31113 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1NEXTE
);
31114 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1rnds4",
31115 V4SI_FTYPE_V4SI_V4SI_INT
, IX86_BUILTIN_SHA1RNDS4
);
31116 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256msg1",
31117 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA256MSG1
);
31118 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256msg2",
31119 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA256MSG2
);
31120 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256rnds2",
31121 V4SI_FTYPE_V4SI_V4SI_V4SI
, IX86_BUILTIN_SHA256RNDS2
);
31124 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
31125 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
31127 /* MMX access to the vec_init patterns. */
31128 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
31129 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
31131 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
31132 V4HI_FTYPE_HI_HI_HI_HI
,
31133 IX86_BUILTIN_VEC_INIT_V4HI
);
31135 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
31136 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
31137 IX86_BUILTIN_VEC_INIT_V8QI
);
31139 /* Access to the vec_extract patterns. */
31140 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
31141 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
31142 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
31143 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
31144 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
31145 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
31146 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
31147 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
31148 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
31149 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
31151 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
31152 "__builtin_ia32_vec_ext_v4hi",
31153 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
31155 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
31156 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
31158 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
31159 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
31161 /* Access to the vec_set patterns. */
31162 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
31163 "__builtin_ia32_vec_set_v2di",
31164 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
31166 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
31167 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
31169 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
31170 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
31172 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
31173 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
31175 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
31176 "__builtin_ia32_vec_set_v4hi",
31177 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
31179 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
31180 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
31183 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
31184 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
31185 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
31186 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
31187 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
31188 "__builtin_ia32_rdseed_di_step",
31189 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
31192 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31193 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
31194 def_builtin (OPTION_MASK_ISA_64BIT
,
31195 "__builtin_ia32_addcarryx_u64",
31196 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
31197 IX86_BUILTIN_ADDCARRYX64
);
31200 def_builtin (0, "__builtin_ia32_sbb_u32",
31201 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_SBB32
);
31202 def_builtin (OPTION_MASK_ISA_64BIT
,
31203 "__builtin_ia32_sbb_u64",
31204 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
31205 IX86_BUILTIN_SBB64
);
31207 /* Read/write FLAGS. */
31208 def_builtin (~OPTION_MASK_ISA_64BIT
, "__builtin_ia32_readeflags_u32",
31209 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
31210 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_readeflags_u64",
31211 UINT64_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
31212 def_builtin (~OPTION_MASK_ISA_64BIT
, "__builtin_ia32_writeeflags_u32",
31213 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_WRITE_FLAGS
);
31214 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_writeeflags_u64",
31215 VOID_FTYPE_UINT64
, IX86_BUILTIN_WRITE_FLAGS
);
31218 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT
, "__builtin_ia32_clflushopt",
31219 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSHOPT
);
31221 /* Add FMA4 multi-arg argument instructions */
31222 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
31227 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
31228 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
31232 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31233 to return a pointer to VERSION_DECL if the outcome of the expression
31234 formed by PREDICATE_CHAIN is true. This function will be called during
31235 version dispatch to decide which function version to execute. It returns
31236 the basic block at the end, to which more conditions can be added. */
31239 add_condition_to_bb (tree function_decl
, tree version_decl
,
31240 tree predicate_chain
, basic_block new_bb
)
31242 gimple return_stmt
;
31243 tree convert_expr
, result_var
;
31244 gimple convert_stmt
;
31245 gimple call_cond_stmt
;
31246 gimple if_else_stmt
;
31248 basic_block bb1
, bb2
, bb3
;
31251 tree cond_var
, and_expr_var
= NULL_TREE
;
31254 tree predicate_decl
, predicate_arg
;
31256 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
31258 gcc_assert (new_bb
!= NULL
);
31259 gseq
= bb_seq (new_bb
);
31262 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
31263 build_fold_addr_expr (version_decl
));
31264 result_var
= create_tmp_var (ptr_type_node
, NULL
);
31265 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
31266 return_stmt
= gimple_build_return (result_var
);
31268 if (predicate_chain
== NULL_TREE
)
31270 gimple_seq_add_stmt (&gseq
, convert_stmt
);
31271 gimple_seq_add_stmt (&gseq
, return_stmt
);
31272 set_bb_seq (new_bb
, gseq
);
31273 gimple_set_bb (convert_stmt
, new_bb
);
31274 gimple_set_bb (return_stmt
, new_bb
);
31279 while (predicate_chain
!= NULL
)
31281 cond_var
= create_tmp_var (integer_type_node
, NULL
);
31282 predicate_decl
= TREE_PURPOSE (predicate_chain
);
31283 predicate_arg
= TREE_VALUE (predicate_chain
);
31284 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
31285 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
31287 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
31288 gimple_set_bb (call_cond_stmt
, new_bb
);
31289 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
31291 predicate_chain
= TREE_CHAIN (predicate_chain
);
31293 if (and_expr_var
== NULL
)
31294 and_expr_var
= cond_var
;
31297 gimple assign_stmt
;
31298 /* Use MIN_EXPR to check if any integer is zero?.
31299 and_expr_var = min_expr <cond_var, and_expr_var> */
31300 assign_stmt
= gimple_build_assign (and_expr_var
,
31301 build2 (MIN_EXPR
, integer_type_node
,
31302 cond_var
, and_expr_var
));
31304 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
31305 gimple_set_bb (assign_stmt
, new_bb
);
31306 gimple_seq_add_stmt (&gseq
, assign_stmt
);
31310 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
31312 NULL_TREE
, NULL_TREE
);
31313 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
31314 gimple_set_bb (if_else_stmt
, new_bb
);
31315 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
31317 gimple_seq_add_stmt (&gseq
, convert_stmt
);
31318 gimple_seq_add_stmt (&gseq
, return_stmt
);
31319 set_bb_seq (new_bb
, gseq
);
31322 e12
= split_block (bb1
, if_else_stmt
);
31324 e12
->flags
&= ~EDGE_FALLTHRU
;
31325 e12
->flags
|= EDGE_TRUE_VALUE
;
31327 e23
= split_block (bb2
, return_stmt
);
31329 gimple_set_bb (convert_stmt
, bb2
);
31330 gimple_set_bb (return_stmt
, bb2
);
31333 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
31336 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
31343 /* This parses the attribute arguments to target in DECL and determines
31344 the right builtin to use to match the platform specification.
31345 It returns the priority value for this version decl. If PREDICATE_LIST
31346 is not NULL, it stores the list of cpu features that need to be checked
31347 before dispatching this function. */
31349 static unsigned int
31350 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
31353 struct cl_target_option cur_target
;
31355 struct cl_target_option
*new_target
;
31356 const char *arg_str
= NULL
;
31357 const char *attrs_str
= NULL
;
31358 char *tok_str
= NULL
;
31361 /* Priority of i386 features, greater value is higher priority. This is
31362 used to decide the order in which function dispatch must happen. For
31363 instance, a version specialized for SSE4.2 should be checked for dispatch
31364 before a version for SSE3, as SSE4.2 implies SSE3. */
31365 enum feature_priority
31391 enum feature_priority priority
= P_ZERO
;
31393 /* These are the target attribute strings for which a dispatcher is
31394 available, from fold_builtin_cpu. */
31396 static struct _feature_list
31398 const char *const name
;
31399 const enum feature_priority priority
;
31401 const feature_list
[] =
31407 {"sse4a", P_SSE4_A
},
31408 {"ssse3", P_SSSE3
},
31409 {"sse4.1", P_SSE4_1
},
31410 {"sse4.2", P_SSE4_2
},
31411 {"popcnt", P_POPCNT
},
31420 static unsigned int NUM_FEATURES
31421 = sizeof (feature_list
) / sizeof (struct _feature_list
);
31425 tree predicate_chain
= NULL_TREE
;
31426 tree predicate_decl
, predicate_arg
;
31428 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
31429 gcc_assert (attrs
!= NULL
);
31431 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
31433 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
31434 attrs_str
= TREE_STRING_POINTER (attrs
);
31436 /* Return priority zero for default function. */
31437 if (strcmp (attrs_str
, "default") == 0)
31440 /* Handle arch= if specified. For priority, set it to be 1 more than
31441 the best instruction set the processor can handle. For instance, if
31442 there is a version for atom and a version for ssse3 (the highest ISA
31443 priority for atom), the atom version must be checked for dispatch
31444 before the ssse3 version. */
31445 if (strstr (attrs_str
, "arch=") != NULL
)
31447 cl_target_option_save (&cur_target
, &global_options
);
31448 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
31449 &global_options_set
);
31451 gcc_assert (target_node
);
31452 new_target
= TREE_TARGET_OPTION (target_node
);
31453 gcc_assert (new_target
);
31455 if (new_target
->arch_specified
&& new_target
->arch
> 0)
31457 switch (new_target
->arch
)
31459 case PROCESSOR_CORE2
:
31461 priority
= P_PROC_SSSE3
;
31463 case PROCESSOR_NEHALEM
:
31464 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_AES
)
31465 arg_str
= "westmere";
31467 /* We translate "arch=corei7" and "arch=nehalem" to
31468 "corei7" so that it will be mapped to M_INTEL_COREI7
31469 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31470 arg_str
= "corei7";
31471 priority
= P_PROC_SSE4_2
;
31473 case PROCESSOR_SANDYBRIDGE
:
31474 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_F16C
)
31475 arg_str
= "ivybridge";
31477 arg_str
= "sandybridge";
31478 priority
= P_PROC_AVX
;
31480 case PROCESSOR_HASWELL
:
31481 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_ADX
)
31482 arg_str
= "broadwell";
31484 arg_str
= "haswell";
31485 priority
= P_PROC_AVX2
;
31487 case PROCESSOR_BONNELL
:
31488 arg_str
= "bonnell";
31489 priority
= P_PROC_SSSE3
;
31491 case PROCESSOR_SILVERMONT
:
31492 arg_str
= "silvermont";
31493 priority
= P_PROC_SSE4_2
;
31495 case PROCESSOR_AMDFAM10
:
31496 arg_str
= "amdfam10h";
31497 priority
= P_PROC_SSE4_A
;
31499 case PROCESSOR_BTVER1
:
31500 arg_str
= "btver1";
31501 priority
= P_PROC_SSE4_A
;
31503 case PROCESSOR_BTVER2
:
31504 arg_str
= "btver2";
31505 priority
= P_PROC_AVX
;
31507 case PROCESSOR_BDVER1
:
31508 arg_str
= "bdver1";
31509 priority
= P_PROC_XOP
;
31511 case PROCESSOR_BDVER2
:
31512 arg_str
= "bdver2";
31513 priority
= P_PROC_FMA
;
31515 case PROCESSOR_BDVER3
:
31516 arg_str
= "bdver3";
31517 priority
= P_PROC_FMA
;
31519 case PROCESSOR_BDVER4
:
31520 arg_str
= "bdver4";
31521 priority
= P_PROC_AVX2
;
31526 cl_target_option_restore (&global_options
, &cur_target
);
31528 if (predicate_list
&& arg_str
== NULL
)
31530 error_at (DECL_SOURCE_LOCATION (decl
),
31531 "No dispatcher found for the versioning attributes");
31535 if (predicate_list
)
31537 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
31538 /* For a C string literal the length includes the trailing NULL. */
31539 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
31540 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
31545 /* Process feature name. */
31546 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
31547 strcpy (tok_str
, attrs_str
);
31548 token
= strtok (tok_str
, ",");
31549 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
31551 while (token
!= NULL
)
31553 /* Do not process "arch=" */
31554 if (strncmp (token
, "arch=", 5) == 0)
31556 token
= strtok (NULL
, ",");
31559 for (i
= 0; i
< NUM_FEATURES
; ++i
)
31561 if (strcmp (token
, feature_list
[i
].name
) == 0)
31563 if (predicate_list
)
31565 predicate_arg
= build_string_literal (
31566 strlen (feature_list
[i
].name
) + 1,
31567 feature_list
[i
].name
);
31568 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
31571 /* Find the maximum priority feature. */
31572 if (feature_list
[i
].priority
> priority
)
31573 priority
= feature_list
[i
].priority
;
31578 if (predicate_list
&& i
== NUM_FEATURES
)
31580 error_at (DECL_SOURCE_LOCATION (decl
),
31581 "No dispatcher found for %s", token
);
31584 token
= strtok (NULL
, ",");
31588 if (predicate_list
&& predicate_chain
== NULL_TREE
)
31590 error_at (DECL_SOURCE_LOCATION (decl
),
31591 "No dispatcher found for the versioning attributes : %s",
31595 else if (predicate_list
)
31597 predicate_chain
= nreverse (predicate_chain
);
31598 *predicate_list
= predicate_chain
;
31604 /* This compares the priority of target features in function DECL1
31605 and DECL2. It returns positive value if DECL1 is higher priority,
31606 negative value if DECL2 is higher priority and 0 if they are the
31610 ix86_compare_version_priority (tree decl1
, tree decl2
)
31612 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
31613 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
31615 return (int)priority1
- (int)priority2
;
31618 /* V1 and V2 point to function versions with different priorities
31619 based on the target ISA. This function compares their priorities. */
31622 feature_compare (const void *v1
, const void *v2
)
31624 typedef struct _function_version_info
31627 tree predicate_chain
;
31628 unsigned int dispatch_priority
;
31629 } function_version_info
;
31631 const function_version_info c1
= *(const function_version_info
*)v1
;
31632 const function_version_info c2
= *(const function_version_info
*)v2
;
31633 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
31636 /* This function generates the dispatch function for
31637 multi-versioned functions. DISPATCH_DECL is the function which will
31638 contain the dispatch logic. FNDECLS are the function choices for
31639 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31640 in DISPATCH_DECL in which the dispatch code is generated. */
31643 dispatch_function_versions (tree dispatch_decl
,
31645 basic_block
*empty_bb
)
31648 gimple ifunc_cpu_init_stmt
;
31652 vec
<tree
> *fndecls
;
31653 unsigned int num_versions
= 0;
31654 unsigned int actual_versions
= 0;
31657 struct _function_version_info
31660 tree predicate_chain
;
31661 unsigned int dispatch_priority
;
31662 }*function_version_info
;
31664 gcc_assert (dispatch_decl
!= NULL
31665 && fndecls_p
!= NULL
31666 && empty_bb
!= NULL
);
31668 /*fndecls_p is actually a vector. */
31669 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
31671 /* At least one more version other than the default. */
31672 num_versions
= fndecls
->length ();
31673 gcc_assert (num_versions
>= 2);
31675 function_version_info
= (struct _function_version_info
*)
31676 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
31678 /* The first version in the vector is the default decl. */
31679 default_decl
= (*fndecls
)[0];
31681 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
31683 gseq
= bb_seq (*empty_bb
);
31684 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31685 constructors, so explicity call __builtin_cpu_init here. */
31686 ifunc_cpu_init_stmt
= gimple_build_call_vec (
31687 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
31688 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
31689 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
31690 set_bb_seq (*empty_bb
, gseq
);
31695 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
31697 tree version_decl
= ele
;
31698 tree predicate_chain
= NULL_TREE
;
31699 unsigned int priority
;
31700 /* Get attribute string, parse it and find the right predicate decl.
31701 The predicate function could be a lengthy combination of many
31702 features, like arch-type and various isa-variants. */
31703 priority
= get_builtin_code_for_version (version_decl
,
31706 if (predicate_chain
== NULL_TREE
)
31709 function_version_info
[actual_versions
].version_decl
= version_decl
;
31710 function_version_info
[actual_versions
].predicate_chain
31712 function_version_info
[actual_versions
].dispatch_priority
= priority
;
31716 /* Sort the versions according to descending order of dispatch priority. The
31717 priority is based on the ISA. This is not a perfect solution. There
31718 could still be ambiguity. If more than one function version is suitable
31719 to execute, which one should be dispatched? In future, allow the user
31720 to specify a dispatch priority next to the version. */
31721 qsort (function_version_info
, actual_versions
,
31722 sizeof (struct _function_version_info
), feature_compare
);
31724 for (i
= 0; i
< actual_versions
; ++i
)
31725 *empty_bb
= add_condition_to_bb (dispatch_decl
,
31726 function_version_info
[i
].version_decl
,
31727 function_version_info
[i
].predicate_chain
,
31730 /* dispatch default version at the end. */
31731 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
31734 free (function_version_info
);
31738 /* Comparator function to be used in qsort routine to sort attribute
31739 specification strings to "target". */
31742 attr_strcmp (const void *v1
, const void *v2
)
31744 const char *c1
= *(char *const*)v1
;
31745 const char *c2
= *(char *const*)v2
;
31746 return strcmp (c1
, c2
);
31749 /* ARGLIST is the argument to target attribute. This function tokenizes
31750 the comma separated arguments, sorts them and returns a string which
31751 is a unique identifier for the comma separated arguments. It also
31752 replaces non-identifier characters "=,-" with "_". */
31755 sorted_attr_string (tree arglist
)
31758 size_t str_len_sum
= 0;
31759 char **args
= NULL
;
31760 char *attr_str
, *ret_str
;
31762 unsigned int argnum
= 1;
31765 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
31767 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
31768 size_t len
= strlen (str
);
31769 str_len_sum
+= len
+ 1;
31770 if (arg
!= arglist
)
31772 for (i
= 0; i
< strlen (str
); i
++)
31777 attr_str
= XNEWVEC (char, str_len_sum
);
31779 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
31781 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
31782 size_t len
= strlen (str
);
31783 memcpy (attr_str
+ str_len_sum
, str
, len
);
31784 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
31785 str_len_sum
+= len
+ 1;
31788 /* Replace "=,-" with "_". */
31789 for (i
= 0; i
< strlen (attr_str
); i
++)
31790 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
31796 args
= XNEWVEC (char *, argnum
);
31799 attr
= strtok (attr_str
, ",");
31800 while (attr
!= NULL
)
31804 attr
= strtok (NULL
, ",");
31807 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
31809 ret_str
= XNEWVEC (char, str_len_sum
);
31811 for (i
= 0; i
< argnum
; i
++)
31813 size_t len
= strlen (args
[i
]);
31814 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
31815 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
31816 str_len_sum
+= len
+ 1;
31820 XDELETEVEC (attr_str
);
31824 /* This function changes the assembler name for functions that are
31825 versions. If DECL is a function version and has a "target"
31826 attribute, it appends the attribute string to its assembler name. */
31829 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
31832 const char *orig_name
, *version_string
;
31833 char *attr_str
, *assembler_name
;
31835 if (DECL_DECLARED_INLINE_P (decl
)
31836 && lookup_attribute ("gnu_inline",
31837 DECL_ATTRIBUTES (decl
)))
31838 error_at (DECL_SOURCE_LOCATION (decl
),
31839 "Function versions cannot be marked as gnu_inline,"
31840 " bodies have to be generated");
31842 if (DECL_VIRTUAL_P (decl
)
31843 || DECL_VINDEX (decl
))
31844 sorry ("Virtual function multiversioning not supported");
31846 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
31848 /* target attribute string cannot be NULL. */
31849 gcc_assert (version_attr
!= NULL_TREE
);
31851 orig_name
= IDENTIFIER_POINTER (id
);
31853 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
31855 if (strcmp (version_string
, "default") == 0)
31858 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
31859 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
31861 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
31863 /* Allow assembler name to be modified if already set. */
31864 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
31865 SET_DECL_RTL (decl
, NULL
);
31867 tree ret
= get_identifier (assembler_name
);
31868 XDELETEVEC (attr_str
);
31869 XDELETEVEC (assembler_name
);
31873 /* This function returns true if FN1 and FN2 are versions of the same function,
31874 that is, the target strings of the function decls are different. This assumes
31875 that FN1 and FN2 have the same signature. */
31878 ix86_function_versions (tree fn1
, tree fn2
)
31881 char *target1
, *target2
;
31884 if (TREE_CODE (fn1
) != FUNCTION_DECL
31885 || TREE_CODE (fn2
) != FUNCTION_DECL
)
31888 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
31889 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
31891 /* At least one function decl should have the target attribute specified. */
31892 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
31895 /* Diagnose missing target attribute if one of the decls is already
31896 multi-versioned. */
31897 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
31899 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
31901 if (attr2
!= NULL_TREE
)
31908 error_at (DECL_SOURCE_LOCATION (fn2
),
31909 "missing %<target%> attribute for multi-versioned %D",
31911 inform (DECL_SOURCE_LOCATION (fn1
),
31912 "previous declaration of %D", fn1
);
31913 /* Prevent diagnosing of the same error multiple times. */
31914 DECL_ATTRIBUTES (fn2
)
31915 = tree_cons (get_identifier ("target"),
31916 copy_node (TREE_VALUE (attr1
)),
31917 DECL_ATTRIBUTES (fn2
));
31922 target1
= sorted_attr_string (TREE_VALUE (attr1
));
31923 target2
= sorted_attr_string (TREE_VALUE (attr2
));
31925 /* The sorted target strings must be different for fn1 and fn2
31927 if (strcmp (target1
, target2
) == 0)
31932 XDELETEVEC (target1
);
31933 XDELETEVEC (target2
);
31939 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
31941 /* For function version, add the target suffix to the assembler name. */
31942 if (TREE_CODE (decl
) == FUNCTION_DECL
31943 && DECL_FUNCTION_VERSIONED (decl
))
31944 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
31945 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31946 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
31952 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31953 is true, append the full path name of the source file. */
31956 make_name (tree decl
, const char *suffix
, bool make_unique
)
31958 char *global_var_name
;
31961 const char *unique_name
= NULL
;
31963 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
31965 /* Get a unique name that can be used globally without any chances
31966 of collision at link time. */
31968 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
31970 name_len
= strlen (name
) + strlen (suffix
) + 2;
31973 name_len
+= strlen (unique_name
) + 1;
31974 global_var_name
= XNEWVEC (char, name_len
);
31976 /* Use '.' to concatenate names as it is demangler friendly. */
31978 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
31981 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
31983 return global_var_name
;
31986 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31988 /* Make a dispatcher declaration for the multi-versioned function DECL.
31989 Calls to DECL function will be replaced with calls to the dispatcher
31990 by the front-end. Return the decl created. */
31993 make_dispatcher_decl (const tree decl
)
31997 tree fn_type
, func_type
;
31998 bool is_uniq
= false;
32000 if (TREE_PUBLIC (decl
) == 0)
32003 func_name
= make_name (decl
, "ifunc", is_uniq
);
32005 fn_type
= TREE_TYPE (decl
);
32006 func_type
= build_function_type (TREE_TYPE (fn_type
),
32007 TYPE_ARG_TYPES (fn_type
));
32009 func_decl
= build_fn_decl (func_name
, func_type
);
32010 XDELETEVEC (func_name
);
32011 TREE_USED (func_decl
) = 1;
32012 DECL_CONTEXT (func_decl
) = NULL_TREE
;
32013 DECL_INITIAL (func_decl
) = error_mark_node
;
32014 DECL_ARTIFICIAL (func_decl
) = 1;
32015 /* Mark this func as external, the resolver will flip it again if
32016 it gets generated. */
32017 DECL_EXTERNAL (func_decl
) = 1;
32018 /* This will be of type IFUNCs have to be externally visible. */
32019 TREE_PUBLIC (func_decl
) = 1;
32026 /* Returns true if decl is multi-versioned and DECL is the default function,
32027 that is it is not tagged with target specific optimization. */
32030 is_function_default_version (const tree decl
)
32032 if (TREE_CODE (decl
) != FUNCTION_DECL
32033 || !DECL_FUNCTION_VERSIONED (decl
))
32035 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
32037 attr
= TREE_VALUE (TREE_VALUE (attr
));
32038 return (TREE_CODE (attr
) == STRING_CST
32039 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
32042 /* Make a dispatcher declaration for the multi-versioned function DECL.
32043 Calls to DECL function will be replaced with calls to the dispatcher
32044 by the front-end. Returns the decl of the dispatcher function. */
32047 ix86_get_function_versions_dispatcher (void *decl
)
32049 tree fn
= (tree
) decl
;
32050 struct cgraph_node
*node
= NULL
;
32051 struct cgraph_node
*default_node
= NULL
;
32052 struct cgraph_function_version_info
*node_v
= NULL
;
32053 struct cgraph_function_version_info
*first_v
= NULL
;
32055 tree dispatch_decl
= NULL
;
32057 struct cgraph_function_version_info
*default_version_info
= NULL
;
32059 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
32061 node
= cgraph_node::get (fn
);
32062 gcc_assert (node
!= NULL
);
32064 node_v
= node
->function_version ();
32065 gcc_assert (node_v
!= NULL
);
32067 if (node_v
->dispatcher_resolver
!= NULL
)
32068 return node_v
->dispatcher_resolver
;
32070 /* Find the default version and make it the first node. */
32072 /* Go to the beginning of the chain. */
32073 while (first_v
->prev
!= NULL
)
32074 first_v
= first_v
->prev
;
32075 default_version_info
= first_v
;
32076 while (default_version_info
!= NULL
)
32078 if (is_function_default_version
32079 (default_version_info
->this_node
->decl
))
32081 default_version_info
= default_version_info
->next
;
32084 /* If there is no default node, just return NULL. */
32085 if (default_version_info
== NULL
)
32088 /* Make default info the first node. */
32089 if (first_v
!= default_version_info
)
32091 default_version_info
->prev
->next
= default_version_info
->next
;
32092 if (default_version_info
->next
)
32093 default_version_info
->next
->prev
= default_version_info
->prev
;
32094 first_v
->prev
= default_version_info
;
32095 default_version_info
->next
= first_v
;
32096 default_version_info
->prev
= NULL
;
32099 default_node
= default_version_info
->this_node
;
32101 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32102 if (targetm
.has_ifunc_p ())
32104 struct cgraph_function_version_info
*it_v
= NULL
;
32105 struct cgraph_node
*dispatcher_node
= NULL
;
32106 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
32108 /* Right now, the dispatching is done via ifunc. */
32109 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
32111 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
32112 gcc_assert (dispatcher_node
!= NULL
);
32113 dispatcher_node
->dispatcher_function
= 1;
32114 dispatcher_version_info
32115 = dispatcher_node
->insert_new_function_version ();
32116 dispatcher_version_info
->next
= default_version_info
;
32117 dispatcher_node
->definition
= 1;
32119 /* Set the dispatcher for all the versions. */
32120 it_v
= default_version_info
;
32121 while (it_v
!= NULL
)
32123 it_v
->dispatcher_resolver
= dispatch_decl
;
32130 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
32131 "multiversioning needs ifunc which is not supported "
32135 return dispatch_decl
;
32138 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32142 make_attribute (const char *name
, const char *arg_name
, tree chain
)
32145 tree attr_arg_name
;
32149 attr_name
= get_identifier (name
);
32150 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
32151 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
32152 attr
= tree_cons (attr_name
, attr_args
, chain
);
32156 /* Make the resolver function decl to dispatch the versions of
32157 a multi-versioned function, DEFAULT_DECL. Create an
32158 empty basic block in the resolver and store the pointer in
32159 EMPTY_BB. Return the decl of the resolver function. */
32162 make_resolver_func (const tree default_decl
,
32163 const tree dispatch_decl
,
32164 basic_block
*empty_bb
)
32166 char *resolver_name
;
32167 tree decl
, type
, decl_name
, t
;
32168 bool is_uniq
= false;
32170 /* IFUNC's have to be globally visible. So, if the default_decl is
32171 not, then the name of the IFUNC should be made unique. */
32172 if (TREE_PUBLIC (default_decl
) == 0)
32175 /* Append the filename to the resolver function if the versions are
32176 not externally visible. This is because the resolver function has
32177 to be externally visible for the loader to find it. So, appending
32178 the filename will prevent conflicts with a resolver function from
32179 another module which is based on the same version name. */
32180 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
32182 /* The resolver function should return a (void *). */
32183 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
32185 decl
= build_fn_decl (resolver_name
, type
);
32186 decl_name
= get_identifier (resolver_name
);
32187 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
32189 DECL_NAME (decl
) = decl_name
;
32190 TREE_USED (decl
) = 1;
32191 DECL_ARTIFICIAL (decl
) = 1;
32192 DECL_IGNORED_P (decl
) = 0;
32193 /* IFUNC resolvers have to be externally visible. */
32194 TREE_PUBLIC (decl
) = 1;
32195 DECL_UNINLINABLE (decl
) = 1;
32197 /* Resolver is not external, body is generated. */
32198 DECL_EXTERNAL (decl
) = 0;
32199 DECL_EXTERNAL (dispatch_decl
) = 0;
32201 DECL_CONTEXT (decl
) = NULL_TREE
;
32202 DECL_INITIAL (decl
) = make_node (BLOCK
);
32203 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
32205 if (DECL_COMDAT_GROUP (default_decl
)
32206 || TREE_PUBLIC (default_decl
))
32208 /* In this case, each translation unit with a call to this
32209 versioned function will put out a resolver. Ensure it
32210 is comdat to keep just one copy. */
32211 DECL_COMDAT (decl
) = 1;
32212 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
32214 /* Build result decl and add to function_decl. */
32215 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
32216 DECL_ARTIFICIAL (t
) = 1;
32217 DECL_IGNORED_P (t
) = 1;
32218 DECL_RESULT (decl
) = t
;
32220 gimplify_function_tree (decl
);
32221 push_cfun (DECL_STRUCT_FUNCTION (decl
));
32222 *empty_bb
= init_lowered_empty_function (decl
, false);
32224 cgraph_node::add_new_function (decl
, true);
32225 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
32229 gcc_assert (dispatch_decl
!= NULL
);
32230 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32231 DECL_ATTRIBUTES (dispatch_decl
)
32232 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
32234 /* Create the alias for dispatch to resolver here. */
32235 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32236 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
32237 XDELETEVEC (resolver_name
);
32241 /* Generate the dispatching code body to dispatch multi-versioned function
32242 DECL. The target hook is called to process the "target" attributes and
32243 provide the code to dispatch the right function at run-time. NODE points
32244 to the dispatcher decl whose body will be created. */
32247 ix86_generate_version_dispatcher_body (void *node_p
)
32249 tree resolver_decl
;
32250 basic_block empty_bb
;
32251 tree default_ver_decl
;
32252 struct cgraph_node
*versn
;
32253 struct cgraph_node
*node
;
32255 struct cgraph_function_version_info
*node_version_info
= NULL
;
32256 struct cgraph_function_version_info
*versn_info
= NULL
;
32258 node
= (cgraph_node
*)node_p
;
32260 node_version_info
= node
->function_version ();
32261 gcc_assert (node
->dispatcher_function
32262 && node_version_info
!= NULL
);
32264 if (node_version_info
->dispatcher_resolver
)
32265 return node_version_info
->dispatcher_resolver
;
32267 /* The first version in the chain corresponds to the default version. */
32268 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
32270 /* node is going to be an alias, so remove the finalized bit. */
32271 node
->definition
= false;
32273 resolver_decl
= make_resolver_func (default_ver_decl
,
32274 node
->decl
, &empty_bb
);
32276 node_version_info
->dispatcher_resolver
= resolver_decl
;
32278 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
32280 auto_vec
<tree
, 2> fn_ver_vec
;
32282 for (versn_info
= node_version_info
->next
; versn_info
;
32283 versn_info
= versn_info
->next
)
32285 versn
= versn_info
->this_node
;
32286 /* Check for virtual functions here again, as by this time it should
32287 have been determined if this function needs a vtable index or
32288 not. This happens for methods in derived classes that override
32289 virtual methods in base classes but are not explicitly marked as
32291 if (DECL_VINDEX (versn
->decl
))
32292 sorry ("Virtual function multiversioning not supported");
32294 fn_ver_vec
.safe_push (versn
->decl
);
32297 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
32298 cgraph_edge::rebuild_edges ();
32300 return resolver_decl
;
32302 /* This builds the processor_model struct type defined in
32303 libgcc/config/i386/cpuinfo.c */
32306 build_processor_model_struct (void)
32308 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32310 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
32312 tree type
= make_node (RECORD_TYPE
);
32314 /* The first 3 fields are unsigned int. */
32315 for (i
= 0; i
< 3; ++i
)
32317 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
32318 get_identifier (field_name
[i
]), unsigned_type_node
);
32319 if (field_chain
!= NULL_TREE
)
32320 DECL_CHAIN (field
) = field_chain
;
32321 field_chain
= field
;
32324 /* The last field is an array of unsigned integers of size one. */
32325 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
32326 get_identifier (field_name
[3]),
32327 build_array_type (unsigned_type_node
,
32328 build_index_type (size_one_node
)));
32329 if (field_chain
!= NULL_TREE
)
32330 DECL_CHAIN (field
) = field_chain
;
32331 field_chain
= field
;
32333 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
32337 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32340 make_var_decl (tree type
, const char *name
)
32344 new_decl
= build_decl (UNKNOWN_LOCATION
,
32346 get_identifier(name
),
32349 DECL_EXTERNAL (new_decl
) = 1;
32350 TREE_STATIC (new_decl
) = 1;
32351 TREE_PUBLIC (new_decl
) = 1;
32352 DECL_INITIAL (new_decl
) = 0;
32353 DECL_ARTIFICIAL (new_decl
) = 0;
32354 DECL_PRESERVE_P (new_decl
) = 1;
32356 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
32357 assemble_variable (new_decl
, 0, 0, 0);
32362 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32363 into an integer defined in libgcc/config/i386/cpuinfo.c */
32366 fold_builtin_cpu (tree fndecl
, tree
*args
)
32369 enum ix86_builtins fn_code
= (enum ix86_builtins
)
32370 DECL_FUNCTION_CODE (fndecl
);
32371 tree param_string_cst
= NULL
;
32373 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32374 enum processor_features
32394 /* These are the values for vendor types and cpu types and subtypes
32395 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32396 the corresponding start value. */
32397 enum processor_model
32407 M_INTEL_SILVERMONT
,
32410 M_CPU_SUBTYPE_START
,
32411 M_INTEL_COREI7_NEHALEM
,
32412 M_INTEL_COREI7_WESTMERE
,
32413 M_INTEL_COREI7_SANDYBRIDGE
,
32414 M_AMDFAM10H_BARCELONA
,
32415 M_AMDFAM10H_SHANGHAI
,
32416 M_AMDFAM10H_ISTANBUL
,
32417 M_AMDFAM15H_BDVER1
,
32418 M_AMDFAM15H_BDVER2
,
32419 M_AMDFAM15H_BDVER3
,
32420 M_AMDFAM15H_BDVER4
,
32421 M_INTEL_COREI7_IVYBRIDGE
,
32422 M_INTEL_COREI7_HASWELL
32425 static struct _arch_names_table
32427 const char *const name
;
32428 const enum processor_model model
;
32430 const arch_names_table
[] =
32433 {"intel", M_INTEL
},
32434 {"atom", M_INTEL_BONNELL
},
32435 {"slm", M_INTEL_SILVERMONT
},
32436 {"core2", M_INTEL_CORE2
},
32437 {"corei7", M_INTEL_COREI7
},
32438 {"nehalem", M_INTEL_COREI7_NEHALEM
},
32439 {"westmere", M_INTEL_COREI7_WESTMERE
},
32440 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
32441 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE
},
32442 {"haswell", M_INTEL_COREI7_HASWELL
},
32443 {"bonnell", M_INTEL_BONNELL
},
32444 {"silvermont", M_INTEL_SILVERMONT
},
32445 {"amdfam10h", M_AMDFAM10H
},
32446 {"barcelona", M_AMDFAM10H_BARCELONA
},
32447 {"shanghai", M_AMDFAM10H_SHANGHAI
},
32448 {"istanbul", M_AMDFAM10H_ISTANBUL
},
32449 {"btver1", M_AMD_BTVER1
},
32450 {"amdfam15h", M_AMDFAM15H
},
32451 {"bdver1", M_AMDFAM15H_BDVER1
},
32452 {"bdver2", M_AMDFAM15H_BDVER2
},
32453 {"bdver3", M_AMDFAM15H_BDVER3
},
32454 {"bdver4", M_AMDFAM15H_BDVER4
},
32455 {"btver2", M_AMD_BTVER2
},
32458 static struct _isa_names_table
32460 const char *const name
;
32461 const enum processor_features feature
;
32463 const isa_names_table
[] =
32467 {"popcnt", F_POPCNT
},
32471 {"ssse3", F_SSSE3
},
32472 {"sse4a", F_SSE4_A
},
32473 {"sse4.1", F_SSE4_1
},
32474 {"sse4.2", F_SSE4_2
},
32482 tree __processor_model_type
= build_processor_model_struct ();
32483 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
32487 varpool_node::add (__cpu_model_var
);
32489 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
32491 param_string_cst
= *args
;
32492 while (param_string_cst
32493 && TREE_CODE (param_string_cst
) != STRING_CST
)
32495 /* *args must be a expr that can contain other EXPRS leading to a
32497 if (!EXPR_P (param_string_cst
))
32499 error ("Parameter to builtin must be a string constant or literal");
32500 return integer_zero_node
;
32502 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
32505 gcc_assert (param_string_cst
);
32507 if (fn_code
== IX86_BUILTIN_CPU_IS
)
32513 unsigned int field_val
= 0;
32514 unsigned int NUM_ARCH_NAMES
32515 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
32517 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
32518 if (strcmp (arch_names_table
[i
].name
,
32519 TREE_STRING_POINTER (param_string_cst
)) == 0)
32522 if (i
== NUM_ARCH_NAMES
)
32524 error ("Parameter to builtin not valid: %s",
32525 TREE_STRING_POINTER (param_string_cst
));
32526 return integer_zero_node
;
32529 field
= TYPE_FIELDS (__processor_model_type
);
32530 field_val
= arch_names_table
[i
].model
;
32532 /* CPU types are stored in the next field. */
32533 if (field_val
> M_CPU_TYPE_START
32534 && field_val
< M_CPU_SUBTYPE_START
)
32536 field
= DECL_CHAIN (field
);
32537 field_val
-= M_CPU_TYPE_START
;
32540 /* CPU subtypes are stored in the next field. */
32541 if (field_val
> M_CPU_SUBTYPE_START
)
32543 field
= DECL_CHAIN ( DECL_CHAIN (field
));
32544 field_val
-= M_CPU_SUBTYPE_START
;
32547 /* Get the appropriate field in __cpu_model. */
32548 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
32551 /* Check the value. */
32552 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
32553 build_int_cstu (unsigned_type_node
, field_val
));
32554 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
32556 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
32563 unsigned int field_val
= 0;
32564 unsigned int NUM_ISA_NAMES
32565 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
32567 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
32568 if (strcmp (isa_names_table
[i
].name
,
32569 TREE_STRING_POINTER (param_string_cst
)) == 0)
32572 if (i
== NUM_ISA_NAMES
)
32574 error ("Parameter to builtin not valid: %s",
32575 TREE_STRING_POINTER (param_string_cst
));
32576 return integer_zero_node
;
32579 field
= TYPE_FIELDS (__processor_model_type
);
32580 /* Get the last field, which is __cpu_features. */
32581 while (DECL_CHAIN (field
))
32582 field
= DECL_CHAIN (field
);
32584 /* Get the appropriate field: __cpu_model.__cpu_features */
32585 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
32588 /* Access the 0th element of __cpu_features array. */
32589 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
32590 integer_zero_node
, NULL_TREE
, NULL_TREE
);
32592 field_val
= (1 << isa_names_table
[i
].feature
);
32593 /* Return __cpu_model.__cpu_features[0] & field_val */
32594 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
32595 build_int_cstu (unsigned_type_node
, field_val
));
32596 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
32598 gcc_unreachable ();
32602 ix86_fold_builtin (tree fndecl
, int n_args
,
32603 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
32605 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32607 enum ix86_builtins fn_code
= (enum ix86_builtins
)
32608 DECL_FUNCTION_CODE (fndecl
);
32609 if (fn_code
== IX86_BUILTIN_CPU_IS
32610 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
32612 gcc_assert (n_args
== 1);
32613 return fold_builtin_cpu (fndecl
, args
);
32617 #ifdef SUBTARGET_FOLD_BUILTIN
32618 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
32624 /* Make builtins to detect cpu type and features supported. NAME is
32625 the builtin name, CODE is the builtin code, and FTYPE is the function
32626 type of the builtin. */
32629 make_cpu_type_builtin (const char* name
, int code
,
32630 enum ix86_builtin_func_type ftype
, bool is_const
)
32635 type
= ix86_get_builtin_func_type (ftype
);
32636 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
32638 gcc_assert (decl
!= NULL_TREE
);
32639 ix86_builtins
[(int) code
] = decl
;
32640 TREE_READONLY (decl
) = is_const
;
32643 /* Make builtins to get CPU type and features supported. The created
32646 __builtin_cpu_init (), to detect cpu type and features,
32647 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32648 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32652 ix86_init_platform_type_builtins (void)
32654 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
32655 INT_FTYPE_VOID
, false);
32656 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
32657 INT_FTYPE_PCCHAR
, true);
32658 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
32659 INT_FTYPE_PCCHAR
, true);
32662 /* Internal method for ix86_init_builtins. */
32665 ix86_init_builtins_va_builtins_abi (void)
32667 tree ms_va_ref
, sysv_va_ref
;
32668 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
32669 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
32670 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
32671 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
32675 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
32676 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
32677 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
32679 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
32682 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
32683 fnvoid_va_start_ms
=
32684 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
32685 fnvoid_va_end_sysv
=
32686 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
32687 fnvoid_va_start_sysv
=
32688 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
32690 fnvoid_va_copy_ms
=
32691 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
32693 fnvoid_va_copy_sysv
=
32694 build_function_type_list (void_type_node
, sysv_va_ref
,
32695 sysv_va_ref
, NULL_TREE
);
32697 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
32698 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
32699 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
32700 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
32701 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
32702 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
32703 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
32704 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
32705 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
32706 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
32707 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
32708 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
32712 ix86_init_builtin_types (void)
32714 tree float128_type_node
, float80_type_node
;
32716 /* The __float80 type. */
32717 float80_type_node
= long_double_type_node
;
32718 if (TYPE_MODE (float80_type_node
) != XFmode
)
32720 /* The __float80 type. */
32721 float80_type_node
= make_node (REAL_TYPE
);
32723 TYPE_PRECISION (float80_type_node
) = 80;
32724 layout_type (float80_type_node
);
32726 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
32728 /* The __float128 type. */
32729 float128_type_node
= make_node (REAL_TYPE
);
32730 TYPE_PRECISION (float128_type_node
) = 128;
32731 layout_type (float128_type_node
);
32732 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
32734 /* This macro is built by i386-builtin-types.awk. */
32735 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
32739 ix86_init_builtins (void)
32743 ix86_init_builtin_types ();
32745 /* Builtins to get CPU type and features. */
32746 ix86_init_platform_type_builtins ();
32748 /* TFmode support builtins. */
32749 def_builtin_const (0, "__builtin_infq",
32750 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
32751 def_builtin_const (0, "__builtin_huge_valq",
32752 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
32754 /* We will expand them to normal call if SSE isn't available since
32755 they are used by libgcc. */
32756 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
32757 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
32758 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
32759 TREE_READONLY (t
) = 1;
32760 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
32762 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
32763 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
32764 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
32765 TREE_READONLY (t
) = 1;
32766 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
32768 ix86_init_tm_builtins ();
32769 ix86_init_mmx_sse_builtins ();
32772 ix86_init_builtins_va_builtins_abi ();
32774 #ifdef SUBTARGET_INIT_BUILTINS
32775 SUBTARGET_INIT_BUILTINS
;
32779 /* Return the ix86 builtin for CODE. */
32782 ix86_builtin_decl (unsigned code
, bool)
32784 if (code
>= IX86_BUILTIN_MAX
)
32785 return error_mark_node
;
32787 return ix86_builtins
[code
];
32790 /* Errors in the source file can cause expand_expr to return const0_rtx
32791 where we expect a vector. To avoid crashing, use one of the vector
32792 clear instructions. */
32794 safe_vector_operand (rtx x
, enum machine_mode mode
)
32796 if (x
== const0_rtx
)
32797 x
= CONST0_RTX (mode
);
32801 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32804 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
32807 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32808 tree arg1
= CALL_EXPR_ARG (exp
, 1);
32809 rtx op0
= expand_normal (arg0
);
32810 rtx op1
= expand_normal (arg1
);
32811 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
32812 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
32813 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
32815 if (VECTOR_MODE_P (mode0
))
32816 op0
= safe_vector_operand (op0
, mode0
);
32817 if (VECTOR_MODE_P (mode1
))
32818 op1
= safe_vector_operand (op1
, mode1
);
32820 if (optimize
|| !target
32821 || GET_MODE (target
) != tmode
32822 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
32823 target
= gen_reg_rtx (tmode
);
32825 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
32827 rtx x
= gen_reg_rtx (V4SImode
);
32828 emit_insn (gen_sse2_loadd (x
, op1
));
32829 op1
= gen_lowpart (TImode
, x
);
32832 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32833 op0
= copy_to_mode_reg (mode0
, op0
);
32834 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
32835 op1
= copy_to_mode_reg (mode1
, op1
);
32837 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
32846 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32849 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
32850 enum ix86_builtin_func_type m_type
,
32851 enum rtx_code sub_code
)
32856 bool comparison_p
= false;
32858 bool last_arg_constant
= false;
32859 int num_memory
= 0;
32862 enum machine_mode mode
;
32865 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
32869 case MULTI_ARG_4_DF2_DI_I
:
32870 case MULTI_ARG_4_DF2_DI_I1
:
32871 case MULTI_ARG_4_SF2_SI_I
:
32872 case MULTI_ARG_4_SF2_SI_I1
:
32874 last_arg_constant
= true;
32877 case MULTI_ARG_3_SF
:
32878 case MULTI_ARG_3_DF
:
32879 case MULTI_ARG_3_SF2
:
32880 case MULTI_ARG_3_DF2
:
32881 case MULTI_ARG_3_DI
:
32882 case MULTI_ARG_3_SI
:
32883 case MULTI_ARG_3_SI_DI
:
32884 case MULTI_ARG_3_HI
:
32885 case MULTI_ARG_3_HI_SI
:
32886 case MULTI_ARG_3_QI
:
32887 case MULTI_ARG_3_DI2
:
32888 case MULTI_ARG_3_SI2
:
32889 case MULTI_ARG_3_HI2
:
32890 case MULTI_ARG_3_QI2
:
32894 case MULTI_ARG_2_SF
:
32895 case MULTI_ARG_2_DF
:
32896 case MULTI_ARG_2_DI
:
32897 case MULTI_ARG_2_SI
:
32898 case MULTI_ARG_2_HI
:
32899 case MULTI_ARG_2_QI
:
32903 case MULTI_ARG_2_DI_IMM
:
32904 case MULTI_ARG_2_SI_IMM
:
32905 case MULTI_ARG_2_HI_IMM
:
32906 case MULTI_ARG_2_QI_IMM
:
32908 last_arg_constant
= true;
32911 case MULTI_ARG_1_SF
:
32912 case MULTI_ARG_1_DF
:
32913 case MULTI_ARG_1_SF2
:
32914 case MULTI_ARG_1_DF2
:
32915 case MULTI_ARG_1_DI
:
32916 case MULTI_ARG_1_SI
:
32917 case MULTI_ARG_1_HI
:
32918 case MULTI_ARG_1_QI
:
32919 case MULTI_ARG_1_SI_DI
:
32920 case MULTI_ARG_1_HI_DI
:
32921 case MULTI_ARG_1_HI_SI
:
32922 case MULTI_ARG_1_QI_DI
:
32923 case MULTI_ARG_1_QI_SI
:
32924 case MULTI_ARG_1_QI_HI
:
32928 case MULTI_ARG_2_DI_CMP
:
32929 case MULTI_ARG_2_SI_CMP
:
32930 case MULTI_ARG_2_HI_CMP
:
32931 case MULTI_ARG_2_QI_CMP
:
32933 comparison_p
= true;
32936 case MULTI_ARG_2_SF_TF
:
32937 case MULTI_ARG_2_DF_TF
:
32938 case MULTI_ARG_2_DI_TF
:
32939 case MULTI_ARG_2_SI_TF
:
32940 case MULTI_ARG_2_HI_TF
:
32941 case MULTI_ARG_2_QI_TF
:
32947 gcc_unreachable ();
32950 if (optimize
|| !target
32951 || GET_MODE (target
) != tmode
32952 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
32953 target
= gen_reg_rtx (tmode
);
32955 gcc_assert (nargs
<= 4);
32957 for (i
= 0; i
< nargs
; i
++)
32959 tree arg
= CALL_EXPR_ARG (exp
, i
);
32960 rtx op
= expand_normal (arg
);
32961 int adjust
= (comparison_p
) ? 1 : 0;
32962 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
32964 if (last_arg_constant
&& i
== nargs
- 1)
32966 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
32968 enum insn_code new_icode
= icode
;
32971 case CODE_FOR_xop_vpermil2v2df3
:
32972 case CODE_FOR_xop_vpermil2v4sf3
:
32973 case CODE_FOR_xop_vpermil2v4df3
:
32974 case CODE_FOR_xop_vpermil2v8sf3
:
32975 error ("the last argument must be a 2-bit immediate");
32976 return gen_reg_rtx (tmode
);
32977 case CODE_FOR_xop_rotlv2di3
:
32978 new_icode
= CODE_FOR_rotlv2di3
;
32980 case CODE_FOR_xop_rotlv4si3
:
32981 new_icode
= CODE_FOR_rotlv4si3
;
32983 case CODE_FOR_xop_rotlv8hi3
:
32984 new_icode
= CODE_FOR_rotlv8hi3
;
32986 case CODE_FOR_xop_rotlv16qi3
:
32987 new_icode
= CODE_FOR_rotlv16qi3
;
32989 if (CONST_INT_P (op
))
32991 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
32992 op
= GEN_INT (INTVAL (op
) & mask
);
32993 gcc_checking_assert
32994 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
32998 gcc_checking_assert
33000 && insn_data
[new_icode
].operand
[0].mode
== tmode
33001 && insn_data
[new_icode
].operand
[1].mode
== tmode
33002 && insn_data
[new_icode
].operand
[2].mode
== mode
33003 && insn_data
[new_icode
].operand
[0].predicate
33004 == insn_data
[icode
].operand
[0].predicate
33005 && insn_data
[new_icode
].operand
[1].predicate
33006 == insn_data
[icode
].operand
[1].predicate
);
33012 gcc_unreachable ();
33019 if (VECTOR_MODE_P (mode
))
33020 op
= safe_vector_operand (op
, mode
);
33022 /* If we aren't optimizing, only allow one memory operand to be
33024 if (memory_operand (op
, mode
))
33027 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
33030 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
33032 op
= force_reg (mode
, op
);
33036 args
[i
].mode
= mode
;
33042 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
33047 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
33048 GEN_INT ((int)sub_code
));
33049 else if (! comparison_p
)
33050 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
33053 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
33057 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
33062 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
33066 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
33070 gcc_unreachable ();
33080 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33081 insns with vec_merge. */
33084 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
33088 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33089 rtx op1
, op0
= expand_normal (arg0
);
33090 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
33091 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
33093 if (optimize
|| !target
33094 || GET_MODE (target
) != tmode
33095 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
33096 target
= gen_reg_rtx (tmode
);
33098 if (VECTOR_MODE_P (mode0
))
33099 op0
= safe_vector_operand (op0
, mode0
);
33101 if ((optimize
&& !register_operand (op0
, mode0
))
33102 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
33103 op0
= copy_to_mode_reg (mode0
, op0
);
33106 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
33107 op1
= copy_to_mode_reg (mode0
, op1
);
33109 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
33116 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33119 ix86_expand_sse_compare (const struct builtin_description
*d
,
33120 tree exp
, rtx target
, bool swap
)
33123 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33124 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33125 rtx op0
= expand_normal (arg0
);
33126 rtx op1
= expand_normal (arg1
);
33128 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
33129 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
33130 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
33131 enum rtx_code comparison
= d
->comparison
;
33133 if (VECTOR_MODE_P (mode0
))
33134 op0
= safe_vector_operand (op0
, mode0
);
33135 if (VECTOR_MODE_P (mode1
))
33136 op1
= safe_vector_operand (op1
, mode1
);
33138 /* Swap operands if we have a comparison that isn't available in
33142 rtx tmp
= gen_reg_rtx (mode1
);
33143 emit_move_insn (tmp
, op1
);
33148 if (optimize
|| !target
33149 || GET_MODE (target
) != tmode
33150 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
33151 target
= gen_reg_rtx (tmode
);
33153 if ((optimize
&& !register_operand (op0
, mode0
))
33154 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
33155 op0
= copy_to_mode_reg (mode0
, op0
);
33156 if ((optimize
&& !register_operand (op1
, mode1
))
33157 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
33158 op1
= copy_to_mode_reg (mode1
, op1
);
33160 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
33161 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
33168 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33171 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
33175 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33176 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33177 rtx op0
= expand_normal (arg0
);
33178 rtx op1
= expand_normal (arg1
);
33179 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
33180 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
33181 enum rtx_code comparison
= d
->comparison
;
33183 if (VECTOR_MODE_P (mode0
))
33184 op0
= safe_vector_operand (op0
, mode0
);
33185 if (VECTOR_MODE_P (mode1
))
33186 op1
= safe_vector_operand (op1
, mode1
);
33188 /* Swap operands if we have a comparison that isn't available in
33190 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
33197 target
= gen_reg_rtx (SImode
);
33198 emit_move_insn (target
, const0_rtx
);
33199 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33201 if ((optimize
&& !register_operand (op0
, mode0
))
33202 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33203 op0
= copy_to_mode_reg (mode0
, op0
);
33204 if ((optimize
&& !register_operand (op1
, mode1
))
33205 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
33206 op1
= copy_to_mode_reg (mode1
, op1
);
33208 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
33212 emit_insn (gen_rtx_SET (VOIDmode
,
33213 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33214 gen_rtx_fmt_ee (comparison
, QImode
,
33218 return SUBREG_REG (target
);
33221 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33224 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
33228 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33229 rtx op1
, op0
= expand_normal (arg0
);
33230 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
33231 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
33233 if (optimize
|| target
== 0
33234 || GET_MODE (target
) != tmode
33235 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
33236 target
= gen_reg_rtx (tmode
);
33238 if (VECTOR_MODE_P (mode0
))
33239 op0
= safe_vector_operand (op0
, mode0
);
33241 if ((optimize
&& !register_operand (op0
, mode0
))
33242 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33243 op0
= copy_to_mode_reg (mode0
, op0
);
33245 op1
= GEN_INT (d
->comparison
);
33247 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
33255 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
33256 tree exp
, rtx target
)
33259 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33260 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33261 rtx op0
= expand_normal (arg0
);
33262 rtx op1
= expand_normal (arg1
);
33264 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
33265 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
33266 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
33268 if (optimize
|| target
== 0
33269 || GET_MODE (target
) != tmode
33270 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
33271 target
= gen_reg_rtx (tmode
);
33273 op0
= safe_vector_operand (op0
, mode0
);
33274 op1
= safe_vector_operand (op1
, mode1
);
33276 if ((optimize
&& !register_operand (op0
, mode0
))
33277 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33278 op0
= copy_to_mode_reg (mode0
, op0
);
33279 if ((optimize
&& !register_operand (op1
, mode1
))
33280 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
33281 op1
= copy_to_mode_reg (mode1
, op1
);
33283 op2
= GEN_INT (d
->comparison
);
33285 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
33292 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33295 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
33299 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33300 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33301 rtx op0
= expand_normal (arg0
);
33302 rtx op1
= expand_normal (arg1
);
33303 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
33304 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
33305 enum rtx_code comparison
= d
->comparison
;
33307 if (VECTOR_MODE_P (mode0
))
33308 op0
= safe_vector_operand (op0
, mode0
);
33309 if (VECTOR_MODE_P (mode1
))
33310 op1
= safe_vector_operand (op1
, mode1
);
33312 target
= gen_reg_rtx (SImode
);
33313 emit_move_insn (target
, const0_rtx
);
33314 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33316 if ((optimize
&& !register_operand (op0
, mode0
))
33317 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33318 op0
= copy_to_mode_reg (mode0
, op0
);
33319 if ((optimize
&& !register_operand (op1
, mode1
))
33320 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
33321 op1
= copy_to_mode_reg (mode1
, op1
);
33323 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
33327 emit_insn (gen_rtx_SET (VOIDmode
,
33328 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33329 gen_rtx_fmt_ee (comparison
, QImode
,
33333 return SUBREG_REG (target
);
33336 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33339 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
33340 tree exp
, rtx target
)
33343 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33344 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33345 tree arg2
= CALL_EXPR_ARG (exp
, 2);
33346 tree arg3
= CALL_EXPR_ARG (exp
, 3);
33347 tree arg4
= CALL_EXPR_ARG (exp
, 4);
33348 rtx scratch0
, scratch1
;
33349 rtx op0
= expand_normal (arg0
);
33350 rtx op1
= expand_normal (arg1
);
33351 rtx op2
= expand_normal (arg2
);
33352 rtx op3
= expand_normal (arg3
);
33353 rtx op4
= expand_normal (arg4
);
33354 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
33356 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
33357 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
33358 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
33359 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
33360 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
33361 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
33362 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
33364 if (VECTOR_MODE_P (modev2
))
33365 op0
= safe_vector_operand (op0
, modev2
);
33366 if (VECTOR_MODE_P (modev4
))
33367 op2
= safe_vector_operand (op2
, modev4
);
33369 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
33370 op0
= copy_to_mode_reg (modev2
, op0
);
33371 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
33372 op1
= copy_to_mode_reg (modei3
, op1
);
33373 if ((optimize
&& !register_operand (op2
, modev4
))
33374 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
33375 op2
= copy_to_mode_reg (modev4
, op2
);
33376 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
33377 op3
= copy_to_mode_reg (modei5
, op3
);
33379 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
33381 error ("the fifth argument must be an 8-bit immediate");
33385 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
33387 if (optimize
|| !target
33388 || GET_MODE (target
) != tmode0
33389 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
33390 target
= gen_reg_rtx (tmode0
);
33392 scratch1
= gen_reg_rtx (tmode1
);
33394 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
33396 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
33398 if (optimize
|| !target
33399 || GET_MODE (target
) != tmode1
33400 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
33401 target
= gen_reg_rtx (tmode1
);
33403 scratch0
= gen_reg_rtx (tmode0
);
33405 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
33409 gcc_assert (d
->flag
);
33411 scratch0
= gen_reg_rtx (tmode0
);
33412 scratch1
= gen_reg_rtx (tmode1
);
33414 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
33424 target
= gen_reg_rtx (SImode
);
33425 emit_move_insn (target
, const0_rtx
);
33426 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33429 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33430 gen_rtx_fmt_ee (EQ
, QImode
,
33431 gen_rtx_REG ((enum machine_mode
) d
->flag
,
33434 return SUBREG_REG (target
);
33441 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33444 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
33445 tree exp
, rtx target
)
33448 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33449 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33450 tree arg2
= CALL_EXPR_ARG (exp
, 2);
33451 rtx scratch0
, scratch1
;
33452 rtx op0
= expand_normal (arg0
);
33453 rtx op1
= expand_normal (arg1
);
33454 rtx op2
= expand_normal (arg2
);
33455 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
33457 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
33458 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
33459 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
33460 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
33461 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
33463 if (VECTOR_MODE_P (modev2
))
33464 op0
= safe_vector_operand (op0
, modev2
);
33465 if (VECTOR_MODE_P (modev3
))
33466 op1
= safe_vector_operand (op1
, modev3
);
33468 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
33469 op0
= copy_to_mode_reg (modev2
, op0
);
33470 if ((optimize
&& !register_operand (op1
, modev3
))
33471 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
33472 op1
= copy_to_mode_reg (modev3
, op1
);
33474 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
33476 error ("the third argument must be an 8-bit immediate");
33480 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
33482 if (optimize
|| !target
33483 || GET_MODE (target
) != tmode0
33484 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
33485 target
= gen_reg_rtx (tmode0
);
33487 scratch1
= gen_reg_rtx (tmode1
);
33489 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
33491 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
33493 if (optimize
|| !target
33494 || GET_MODE (target
) != tmode1
33495 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
33496 target
= gen_reg_rtx (tmode1
);
33498 scratch0
= gen_reg_rtx (tmode0
);
33500 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
33504 gcc_assert (d
->flag
);
33506 scratch0
= gen_reg_rtx (tmode0
);
33507 scratch1
= gen_reg_rtx (tmode1
);
33509 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
33519 target
= gen_reg_rtx (SImode
);
33520 emit_move_insn (target
, const0_rtx
);
33521 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33524 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33525 gen_rtx_fmt_ee (EQ
, QImode
,
33526 gen_rtx_REG ((enum machine_mode
) d
->flag
,
33529 return SUBREG_REG (target
);
33535 /* Subroutine of ix86_expand_builtin to take care of insns with
33536 variable number of operands. */
33539 ix86_expand_args_builtin (const struct builtin_description
*d
,
33540 tree exp
, rtx target
)
33542 rtx pat
, real_target
;
33543 unsigned int i
, nargs
;
33544 unsigned int nargs_constant
= 0;
33545 unsigned int mask_pos
= 0;
33546 int num_memory
= 0;
33550 enum machine_mode mode
;
33552 bool last_arg_count
= false;
33553 enum insn_code icode
= d
->icode
;
33554 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
33555 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
33556 enum machine_mode rmode
= VOIDmode
;
33558 enum rtx_code comparison
= d
->comparison
;
33560 switch ((enum ix86_builtin_func_type
) d
->flag
)
33562 case V2DF_FTYPE_V2DF_ROUND
:
33563 case V4DF_FTYPE_V4DF_ROUND
:
33564 case V4SF_FTYPE_V4SF_ROUND
:
33565 case V8SF_FTYPE_V8SF_ROUND
:
33566 case V4SI_FTYPE_V4SF_ROUND
:
33567 case V8SI_FTYPE_V8SF_ROUND
:
33568 return ix86_expand_sse_round (d
, exp
, target
);
33569 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
33570 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
33571 case V16SI_FTYPE_V8DF_V8DF_ROUND
:
33572 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
33573 case INT_FTYPE_V8SF_V8SF_PTEST
:
33574 case INT_FTYPE_V4DI_V4DI_PTEST
:
33575 case INT_FTYPE_V4DF_V4DF_PTEST
:
33576 case INT_FTYPE_V4SF_V4SF_PTEST
:
33577 case INT_FTYPE_V2DI_V2DI_PTEST
:
33578 case INT_FTYPE_V2DF_V2DF_PTEST
:
33579 return ix86_expand_sse_ptest (d
, exp
, target
);
33580 case FLOAT128_FTYPE_FLOAT128
:
33581 case FLOAT_FTYPE_FLOAT
:
33582 case INT_FTYPE_INT
:
33583 case UINT64_FTYPE_INT
:
33584 case UINT16_FTYPE_UINT16
:
33585 case INT64_FTYPE_INT64
:
33586 case INT64_FTYPE_V4SF
:
33587 case INT64_FTYPE_V2DF
:
33588 case INT_FTYPE_V16QI
:
33589 case INT_FTYPE_V8QI
:
33590 case INT_FTYPE_V8SF
:
33591 case INT_FTYPE_V4DF
:
33592 case INT_FTYPE_V4SF
:
33593 case INT_FTYPE_V2DF
:
33594 case INT_FTYPE_V32QI
:
33595 case V16QI_FTYPE_V16QI
:
33596 case V8SI_FTYPE_V8SF
:
33597 case V8SI_FTYPE_V4SI
:
33598 case V8HI_FTYPE_V8HI
:
33599 case V8HI_FTYPE_V16QI
:
33600 case V8QI_FTYPE_V8QI
:
33601 case V8SF_FTYPE_V8SF
:
33602 case V8SF_FTYPE_V8SI
:
33603 case V8SF_FTYPE_V4SF
:
33604 case V8SF_FTYPE_V8HI
:
33605 case V4SI_FTYPE_V4SI
:
33606 case V4SI_FTYPE_V16QI
:
33607 case V4SI_FTYPE_V4SF
:
33608 case V4SI_FTYPE_V8SI
:
33609 case V4SI_FTYPE_V8HI
:
33610 case V4SI_FTYPE_V4DF
:
33611 case V4SI_FTYPE_V2DF
:
33612 case V4HI_FTYPE_V4HI
:
33613 case V4DF_FTYPE_V4DF
:
33614 case V4DF_FTYPE_V4SI
:
33615 case V4DF_FTYPE_V4SF
:
33616 case V4DF_FTYPE_V2DF
:
33617 case V4SF_FTYPE_V4SF
:
33618 case V4SF_FTYPE_V4SI
:
33619 case V4SF_FTYPE_V8SF
:
33620 case V4SF_FTYPE_V4DF
:
33621 case V4SF_FTYPE_V8HI
:
33622 case V4SF_FTYPE_V2DF
:
33623 case V2DI_FTYPE_V2DI
:
33624 case V2DI_FTYPE_V16QI
:
33625 case V2DI_FTYPE_V8HI
:
33626 case V2DI_FTYPE_V4SI
:
33627 case V2DF_FTYPE_V2DF
:
33628 case V2DF_FTYPE_V4SI
:
33629 case V2DF_FTYPE_V4DF
:
33630 case V2DF_FTYPE_V4SF
:
33631 case V2DF_FTYPE_V2SI
:
33632 case V2SI_FTYPE_V2SI
:
33633 case V2SI_FTYPE_V4SF
:
33634 case V2SI_FTYPE_V2SF
:
33635 case V2SI_FTYPE_V2DF
:
33636 case V2SF_FTYPE_V2SF
:
33637 case V2SF_FTYPE_V2SI
:
33638 case V32QI_FTYPE_V32QI
:
33639 case V32QI_FTYPE_V16QI
:
33640 case V16HI_FTYPE_V16HI
:
33641 case V16HI_FTYPE_V8HI
:
33642 case V8SI_FTYPE_V8SI
:
33643 case V16HI_FTYPE_V16QI
:
33644 case V8SI_FTYPE_V16QI
:
33645 case V4DI_FTYPE_V16QI
:
33646 case V8SI_FTYPE_V8HI
:
33647 case V4DI_FTYPE_V8HI
:
33648 case V4DI_FTYPE_V4SI
:
33649 case V4DI_FTYPE_V2DI
:
33651 case UINT_FTYPE_V2DF
:
33652 case UINT_FTYPE_V4SF
:
33653 case UINT64_FTYPE_V2DF
:
33654 case UINT64_FTYPE_V4SF
:
33655 case V16QI_FTYPE_V8DI
:
33656 case V16HI_FTYPE_V16SI
:
33657 case V16SI_FTYPE_HI
:
33658 case V16SI_FTYPE_V16SI
:
33659 case V16SI_FTYPE_INT
:
33660 case V16SF_FTYPE_FLOAT
:
33661 case V16SF_FTYPE_V8SF
:
33662 case V16SI_FTYPE_V8SI
:
33663 case V16SF_FTYPE_V4SF
:
33664 case V16SI_FTYPE_V4SI
:
33665 case V16SF_FTYPE_V16SF
:
33666 case V8HI_FTYPE_V8DI
:
33667 case V8UHI_FTYPE_V8UHI
:
33668 case V8SI_FTYPE_V8DI
:
33669 case V8USI_FTYPE_V8USI
:
33670 case V8SF_FTYPE_V8DF
:
33671 case V8DI_FTYPE_QI
:
33672 case V8DI_FTYPE_INT64
:
33673 case V8DI_FTYPE_V4DI
:
33674 case V8DI_FTYPE_V8DI
:
33675 case V8DF_FTYPE_DOUBLE
:
33676 case V8DF_FTYPE_V4DF
:
33677 case V8DF_FTYPE_V2DF
:
33678 case V8DF_FTYPE_V8DF
:
33679 case V8DF_FTYPE_V8SI
:
33682 case V4SF_FTYPE_V4SF_VEC_MERGE
:
33683 case V2DF_FTYPE_V2DF_VEC_MERGE
:
33684 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
33685 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
33686 case V16QI_FTYPE_V16QI_V16QI
:
33687 case V16QI_FTYPE_V8HI_V8HI
:
33688 case V16SI_FTYPE_V16SI_V16SI
:
33689 case V16SF_FTYPE_V16SF_V16SF
:
33690 case V16SF_FTYPE_V16SF_V16SI
:
33691 case V8QI_FTYPE_V8QI_V8QI
:
33692 case V8QI_FTYPE_V4HI_V4HI
:
33693 case V8HI_FTYPE_V8HI_V8HI
:
33694 case V8HI_FTYPE_V16QI_V16QI
:
33695 case V8HI_FTYPE_V4SI_V4SI
:
33696 case V8SF_FTYPE_V8SF_V8SF
:
33697 case V8SF_FTYPE_V8SF_V8SI
:
33698 case V8DI_FTYPE_V8DI_V8DI
:
33699 case V8DF_FTYPE_V8DF_V8DF
:
33700 case V8DF_FTYPE_V8DF_V8DI
:
33701 case V4SI_FTYPE_V4SI_V4SI
:
33702 case V4SI_FTYPE_V8HI_V8HI
:
33703 case V4SI_FTYPE_V4SF_V4SF
:
33704 case V4SI_FTYPE_V2DF_V2DF
:
33705 case V4HI_FTYPE_V4HI_V4HI
:
33706 case V4HI_FTYPE_V8QI_V8QI
:
33707 case V4HI_FTYPE_V2SI_V2SI
:
33708 case V4DF_FTYPE_V4DF_V4DF
:
33709 case V4DF_FTYPE_V4DF_V4DI
:
33710 case V4SF_FTYPE_V4SF_V4SF
:
33711 case V4SF_FTYPE_V4SF_V4SI
:
33712 case V4SF_FTYPE_V4SF_V2SI
:
33713 case V4SF_FTYPE_V4SF_V2DF
:
33714 case V4SF_FTYPE_V4SF_UINT
:
33715 case V4SF_FTYPE_V4SF_UINT64
:
33716 case V4SF_FTYPE_V4SF_DI
:
33717 case V4SF_FTYPE_V4SF_SI
:
33718 case V2DI_FTYPE_V2DI_V2DI
:
33719 case V2DI_FTYPE_V16QI_V16QI
:
33720 case V2DI_FTYPE_V4SI_V4SI
:
33721 case V2UDI_FTYPE_V4USI_V4USI
:
33722 case V2DI_FTYPE_V2DI_V16QI
:
33723 case V2DI_FTYPE_V2DF_V2DF
:
33724 case V2SI_FTYPE_V2SI_V2SI
:
33725 case V2SI_FTYPE_V4HI_V4HI
:
33726 case V2SI_FTYPE_V2SF_V2SF
:
33727 case V2DF_FTYPE_V2DF_V2DF
:
33728 case V2DF_FTYPE_V2DF_V4SF
:
33729 case V2DF_FTYPE_V2DF_V2DI
:
33730 case V2DF_FTYPE_V2DF_DI
:
33731 case V2DF_FTYPE_V2DF_SI
:
33732 case V2DF_FTYPE_V2DF_UINT
:
33733 case V2DF_FTYPE_V2DF_UINT64
:
33734 case V2SF_FTYPE_V2SF_V2SF
:
33735 case V1DI_FTYPE_V1DI_V1DI
:
33736 case V1DI_FTYPE_V8QI_V8QI
:
33737 case V1DI_FTYPE_V2SI_V2SI
:
33738 case V32QI_FTYPE_V16HI_V16HI
:
33739 case V16HI_FTYPE_V8SI_V8SI
:
33740 case V32QI_FTYPE_V32QI_V32QI
:
33741 case V16HI_FTYPE_V32QI_V32QI
:
33742 case V16HI_FTYPE_V16HI_V16HI
:
33743 case V8SI_FTYPE_V4DF_V4DF
:
33744 case V8SI_FTYPE_V8SI_V8SI
:
33745 case V8SI_FTYPE_V16HI_V16HI
:
33746 case V4DI_FTYPE_V4DI_V4DI
:
33747 case V4DI_FTYPE_V8SI_V8SI
:
33748 case V4UDI_FTYPE_V8USI_V8USI
:
33749 case QI_FTYPE_V8DI_V8DI
:
33750 case HI_FTYPE_V16SI_V16SI
:
33751 if (comparison
== UNKNOWN
)
33752 return ix86_expand_binop_builtin (icode
, exp
, target
);
33755 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
33756 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
33757 gcc_assert (comparison
!= UNKNOWN
);
33761 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
33762 case V16HI_FTYPE_V16HI_SI_COUNT
:
33763 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
33764 case V8SI_FTYPE_V8SI_SI_COUNT
:
33765 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
33766 case V4DI_FTYPE_V4DI_INT_COUNT
:
33767 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
33768 case V8HI_FTYPE_V8HI_SI_COUNT
:
33769 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
33770 case V4SI_FTYPE_V4SI_SI_COUNT
:
33771 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
33772 case V4HI_FTYPE_V4HI_SI_COUNT
:
33773 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
33774 case V2DI_FTYPE_V2DI_SI_COUNT
:
33775 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
33776 case V2SI_FTYPE_V2SI_SI_COUNT
:
33777 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
33778 case V1DI_FTYPE_V1DI_SI_COUNT
:
33780 last_arg_count
= true;
33782 case UINT64_FTYPE_UINT64_UINT64
:
33783 case UINT_FTYPE_UINT_UINT
:
33784 case UINT_FTYPE_UINT_USHORT
:
33785 case UINT_FTYPE_UINT_UCHAR
:
33786 case UINT16_FTYPE_UINT16_INT
:
33787 case UINT8_FTYPE_UINT8_INT
:
33788 case HI_FTYPE_HI_HI
:
33789 case V16SI_FTYPE_V8DF_V8DF
:
33792 case V2DI_FTYPE_V2DI_INT_CONVERT
:
33795 nargs_constant
= 1;
33797 case V4DI_FTYPE_V4DI_INT_CONVERT
:
33800 nargs_constant
= 1;
33802 case V8HI_FTYPE_V8HI_INT
:
33803 case V8HI_FTYPE_V8SF_INT
:
33804 case V16HI_FTYPE_V16SF_INT
:
33805 case V8HI_FTYPE_V4SF_INT
:
33806 case V8SF_FTYPE_V8SF_INT
:
33807 case V4SF_FTYPE_V16SF_INT
:
33808 case V16SF_FTYPE_V16SF_INT
:
33809 case V4SI_FTYPE_V4SI_INT
:
33810 case V4SI_FTYPE_V8SI_INT
:
33811 case V4HI_FTYPE_V4HI_INT
:
33812 case V4DF_FTYPE_V4DF_INT
:
33813 case V4DF_FTYPE_V8DF_INT
:
33814 case V4SF_FTYPE_V4SF_INT
:
33815 case V4SF_FTYPE_V8SF_INT
:
33816 case V2DI_FTYPE_V2DI_INT
:
33817 case V2DF_FTYPE_V2DF_INT
:
33818 case V2DF_FTYPE_V4DF_INT
:
33819 case V16HI_FTYPE_V16HI_INT
:
33820 case V8SI_FTYPE_V8SI_INT
:
33821 case V16SI_FTYPE_V16SI_INT
:
33822 case V4SI_FTYPE_V16SI_INT
:
33823 case V4DI_FTYPE_V4DI_INT
:
33824 case V2DI_FTYPE_V4DI_INT
:
33825 case V4DI_FTYPE_V8DI_INT
:
33826 case HI_FTYPE_HI_INT
:
33828 nargs_constant
= 1;
33830 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
33831 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
33832 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
33833 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
33834 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
33835 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
33836 case HI_FTYPE_V16SI_V16SI_HI
:
33837 case QI_FTYPE_V8DI_V8DI_QI
:
33838 case V16HI_FTYPE_V16SI_V16HI_HI
:
33839 case V16QI_FTYPE_V16SI_V16QI_HI
:
33840 case V16QI_FTYPE_V8DI_V16QI_QI
:
33841 case V16SF_FTYPE_V16SF_V16SF_HI
:
33842 case V16SF_FTYPE_V16SF_V16SF_V16SF
:
33843 case V16SF_FTYPE_V16SF_V16SI_V16SF
:
33844 case V16SF_FTYPE_V16SI_V16SF_HI
:
33845 case V16SF_FTYPE_V16SI_V16SF_V16SF
:
33846 case V16SF_FTYPE_V4SF_V16SF_HI
:
33847 case V16SI_FTYPE_SI_V16SI_HI
:
33848 case V16SI_FTYPE_V16HI_V16SI_HI
:
33849 case V16SI_FTYPE_V16QI_V16SI_HI
:
33850 case V16SI_FTYPE_V16SF_V16SI_HI
:
33851 case V16SI_FTYPE_V16SI_V16SI_HI
:
33852 case V16SI_FTYPE_V16SI_V16SI_V16SI
:
33853 case V16SI_FTYPE_V4SI_V16SI_HI
:
33854 case V2DI_FTYPE_V2DI_V2DI_V2DI
:
33855 case V4DI_FTYPE_V4DI_V4DI_V4DI
:
33856 case V8DF_FTYPE_V2DF_V8DF_QI
:
33857 case V8DF_FTYPE_V4DF_V8DF_QI
:
33858 case V8DF_FTYPE_V8DF_V8DF_QI
:
33859 case V8DF_FTYPE_V8DF_V8DF_V8DF
:
33860 case V8DF_FTYPE_V8DF_V8DI_V8DF
:
33861 case V8DF_FTYPE_V8DI_V8DF_V8DF
:
33862 case V8DF_FTYPE_V8SF_V8DF_QI
:
33863 case V8DF_FTYPE_V8SI_V8DF_QI
:
33864 case V8DI_FTYPE_DI_V8DI_QI
:
33865 case V8DI_FTYPE_V16QI_V8DI_QI
:
33866 case V8DI_FTYPE_V2DI_V8DI_QI
:
33867 case V8DI_FTYPE_V4DI_V8DI_QI
:
33868 case V8DI_FTYPE_V8DI_V8DI_QI
:
33869 case V8DI_FTYPE_V8DI_V8DI_V8DI
:
33870 case V8DI_FTYPE_V8HI_V8DI_QI
:
33871 case V8DI_FTYPE_V8SI_V8DI_QI
:
33872 case V8HI_FTYPE_V8DI_V8HI_QI
:
33873 case V8SF_FTYPE_V8DF_V8SF_QI
:
33874 case V8SI_FTYPE_V8DF_V8SI_QI
:
33875 case V8SI_FTYPE_V8DI_V8SI_QI
:
33876 case V4SI_FTYPE_V4SI_V4SI_V4SI
:
33879 case V32QI_FTYPE_V32QI_V32QI_INT
:
33880 case V16HI_FTYPE_V16HI_V16HI_INT
:
33881 case V16QI_FTYPE_V16QI_V16QI_INT
:
33882 case V4DI_FTYPE_V4DI_V4DI_INT
:
33883 case V8HI_FTYPE_V8HI_V8HI_INT
:
33884 case V8SI_FTYPE_V8SI_V8SI_INT
:
33885 case V8SI_FTYPE_V8SI_V4SI_INT
:
33886 case V8SF_FTYPE_V8SF_V8SF_INT
:
33887 case V8SF_FTYPE_V8SF_V4SF_INT
:
33888 case V4SI_FTYPE_V4SI_V4SI_INT
:
33889 case V4DF_FTYPE_V4DF_V4DF_INT
:
33890 case V16SF_FTYPE_V16SF_V16SF_INT
:
33891 case V16SF_FTYPE_V16SF_V4SF_INT
:
33892 case V16SI_FTYPE_V16SI_V4SI_INT
:
33893 case V4DF_FTYPE_V4DF_V2DF_INT
:
33894 case V4SF_FTYPE_V4SF_V4SF_INT
:
33895 case V2DI_FTYPE_V2DI_V2DI_INT
:
33896 case V4DI_FTYPE_V4DI_V2DI_INT
:
33897 case V2DF_FTYPE_V2DF_V2DF_INT
:
33898 case QI_FTYPE_V8DI_V8DI_INT
:
33899 case QI_FTYPE_V8DF_V8DF_INT
:
33900 case QI_FTYPE_V2DF_V2DF_INT
:
33901 case QI_FTYPE_V4SF_V4SF_INT
:
33902 case HI_FTYPE_V16SI_V16SI_INT
:
33903 case HI_FTYPE_V16SF_V16SF_INT
:
33905 nargs_constant
= 1;
33907 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
33910 nargs_constant
= 1;
33912 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
33915 nargs_constant
= 1;
33917 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
33920 nargs_constant
= 1;
33922 case V2DI_FTYPE_V2DI_UINT_UINT
:
33924 nargs_constant
= 2;
33926 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI
:
33927 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI
:
33928 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI
:
33929 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI
:
33930 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI
:
33931 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI
:
33932 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI
:
33933 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI
:
33934 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI
:
33935 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI
:
33936 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI
:
33937 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI
:
33938 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI
:
33939 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI
:
33940 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI
:
33941 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI
:
33944 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
33945 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
33946 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
33947 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
33948 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT
:
33950 nargs_constant
= 1;
33952 case QI_FTYPE_V2DF_V2DF_INT_QI
:
33953 case QI_FTYPE_V4SF_V4SF_INT_QI
:
33956 nargs_constant
= 1;
33958 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
33960 nargs_constant
= 2;
33962 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
33963 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
33966 case QI_FTYPE_V8DI_V8DI_INT_QI
:
33967 case HI_FTYPE_V16SI_V16SI_INT_HI
:
33968 case QI_FTYPE_V8DF_V8DF_INT_QI
:
33969 case HI_FTYPE_V16SF_V16SF_INT_HI
:
33972 nargs_constant
= 1;
33974 case V8DF_FTYPE_V8DF_INT_V8DF_QI
:
33975 case V16SF_FTYPE_V16SF_INT_V16SF_HI
:
33976 case V16HI_FTYPE_V16SF_INT_V16HI_HI
:
33977 case V16SI_FTYPE_V16SI_INT_V16SI_HI
:
33978 case V4SI_FTYPE_V16SI_INT_V4SI_QI
:
33979 case V4DI_FTYPE_V8DI_INT_V4DI_QI
:
33980 case V4DF_FTYPE_V8DF_INT_V4DF_QI
:
33981 case V4SF_FTYPE_V16SF_INT_V4SF_QI
:
33982 case V8DI_FTYPE_V8DI_INT_V8DI_QI
:
33985 nargs_constant
= 1;
33987 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI
:
33988 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI
:
33989 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI
:
33990 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI
:
33991 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI
:
33992 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI
:
33993 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI
:
33994 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI
:
33995 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI
:
33996 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI
:
33999 nargs_constant
= 1;
34001 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI
:
34002 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI
:
34003 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI
:
34004 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI
:
34005 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI
:
34008 nargs_constant
= 1;
34012 gcc_unreachable ();
34015 gcc_assert (nargs
<= ARRAY_SIZE (args
));
34017 if (comparison
!= UNKNOWN
)
34019 gcc_assert (nargs
== 2);
34020 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
34023 if (rmode
== VOIDmode
|| rmode
== tmode
)
34027 || GET_MODE (target
) != tmode
34028 || !insn_p
->operand
[0].predicate (target
, tmode
))
34029 target
= gen_reg_rtx (tmode
);
34030 real_target
= target
;
34034 real_target
= gen_reg_rtx (tmode
);
34035 target
= simplify_gen_subreg (rmode
, real_target
, tmode
, 0);
34038 for (i
= 0; i
< nargs
; i
++)
34040 tree arg
= CALL_EXPR_ARG (exp
, i
);
34041 rtx op
= expand_normal (arg
);
34042 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
34043 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
34045 if (last_arg_count
&& (i
+ 1) == nargs
)
34047 /* SIMD shift insns take either an 8-bit immediate or
34048 register as count. But builtin functions take int as
34049 count. If count doesn't match, we put it in register. */
34052 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
34053 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
34054 op
= copy_to_reg (op
);
34057 else if ((mask_pos
&& (nargs
- i
- mask_pos
) == nargs_constant
) ||
34058 (!mask_pos
&& (nargs
- i
) <= nargs_constant
))
34063 case CODE_FOR_avx_vinsertf128v4di
:
34064 case CODE_FOR_avx_vextractf128v4di
:
34065 error ("the last argument must be an 1-bit immediate");
34068 case CODE_FOR_avx512f_cmpv8di3_mask
:
34069 case CODE_FOR_avx512f_cmpv16si3_mask
:
34070 case CODE_FOR_avx512f_ucmpv8di3_mask
:
34071 case CODE_FOR_avx512f_ucmpv16si3_mask
:
34072 error ("the last argument must be a 3-bit immediate");
34075 case CODE_FOR_sse4_1_roundsd
:
34076 case CODE_FOR_sse4_1_roundss
:
34078 case CODE_FOR_sse4_1_roundpd
:
34079 case CODE_FOR_sse4_1_roundps
:
34080 case CODE_FOR_avx_roundpd256
:
34081 case CODE_FOR_avx_roundps256
:
34083 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
34084 case CODE_FOR_sse4_1_roundps_sfix
:
34085 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
34086 case CODE_FOR_avx_roundps_sfix256
:
34088 case CODE_FOR_sse4_1_blendps
:
34089 case CODE_FOR_avx_blendpd256
:
34090 case CODE_FOR_avx_vpermilv4df
:
34091 case CODE_FOR_avx512f_getmantv8df_mask
:
34092 case CODE_FOR_avx512f_getmantv16sf_mask
:
34093 case CODE_FOR_avx512vl_getmantv8sf_mask
:
34094 case CODE_FOR_avx512vl_getmantv4df_mask
:
34095 case CODE_FOR_avx512vl_getmantv4sf_mask
:
34096 case CODE_FOR_avx512vl_getmantv2df_mask
:
34097 case CODE_FOR_avx512dq_rangepv8df_mask_round
:
34098 case CODE_FOR_avx512dq_rangepv16sf_mask_round
:
34099 case CODE_FOR_avx512dq_rangepv4df_mask
:
34100 case CODE_FOR_avx512dq_rangepv8sf_mask
:
34101 case CODE_FOR_avx512dq_rangepv2df_mask
:
34102 case CODE_FOR_avx512dq_rangepv4sf_mask
:
34103 error ("the last argument must be a 4-bit immediate");
34106 case CODE_FOR_sha1rnds4
:
34107 case CODE_FOR_sse4_1_blendpd
:
34108 case CODE_FOR_avx_vpermilv2df
:
34109 case CODE_FOR_xop_vpermil2v2df3
:
34110 case CODE_FOR_xop_vpermil2v4sf3
:
34111 case CODE_FOR_xop_vpermil2v4df3
:
34112 case CODE_FOR_xop_vpermil2v8sf3
:
34113 case CODE_FOR_avx512f_vinsertf32x4_mask
:
34114 case CODE_FOR_avx512f_vinserti32x4_mask
:
34115 case CODE_FOR_avx512f_vextractf32x4_mask
:
34116 case CODE_FOR_avx512f_vextracti32x4_mask
:
34117 case CODE_FOR_sse2_shufpd
:
34118 case CODE_FOR_sse2_shufpd_mask
:
34119 case CODE_FOR_avx512dq_shuf_f64x2_mask
:
34120 case CODE_FOR_avx512dq_shuf_i64x2_mask
:
34121 case CODE_FOR_avx512vl_shuf_i32x4_mask
:
34122 case CODE_FOR_avx512vl_shuf_f32x4_mask
:
34123 error ("the last argument must be a 2-bit immediate");
34126 case CODE_FOR_avx_vextractf128v4df
:
34127 case CODE_FOR_avx_vextractf128v8sf
:
34128 case CODE_FOR_avx_vextractf128v8si
:
34129 case CODE_FOR_avx_vinsertf128v4df
:
34130 case CODE_FOR_avx_vinsertf128v8sf
:
34131 case CODE_FOR_avx_vinsertf128v8si
:
34132 case CODE_FOR_avx512f_vinsertf64x4_mask
:
34133 case CODE_FOR_avx512f_vinserti64x4_mask
:
34134 case CODE_FOR_avx512f_vextractf64x4_mask
:
34135 case CODE_FOR_avx512f_vextracti64x4_mask
:
34136 case CODE_FOR_avx512dq_vinsertf32x8_mask
:
34137 case CODE_FOR_avx512dq_vinserti32x8_mask
:
34138 case CODE_FOR_avx512vl_vinsertv4df
:
34139 case CODE_FOR_avx512vl_vinsertv4di
:
34140 case CODE_FOR_avx512vl_vinsertv8sf
:
34141 case CODE_FOR_avx512vl_vinsertv8si
:
34142 error ("the last argument must be a 1-bit immediate");
34145 case CODE_FOR_avx_vmcmpv2df3
:
34146 case CODE_FOR_avx_vmcmpv4sf3
:
34147 case CODE_FOR_avx_cmpv2df3
:
34148 case CODE_FOR_avx_cmpv4sf3
:
34149 case CODE_FOR_avx_cmpv4df3
:
34150 case CODE_FOR_avx_cmpv8sf3
:
34151 case CODE_FOR_avx512f_cmpv8df3_mask
:
34152 case CODE_FOR_avx512f_cmpv16sf3_mask
:
34153 case CODE_FOR_avx512f_vmcmpv2df3_mask
:
34154 case CODE_FOR_avx512f_vmcmpv4sf3_mask
:
34155 error ("the last argument must be a 5-bit immediate");
34159 switch (nargs_constant
)
34162 if ((mask_pos
&& (nargs
- i
- mask_pos
) == nargs_constant
) ||
34163 (!mask_pos
&& (nargs
- i
) == nargs_constant
))
34165 error ("the next to last argument must be an 8-bit immediate");
34169 error ("the last argument must be an 8-bit immediate");
34172 gcc_unreachable ();
34179 if (VECTOR_MODE_P (mode
))
34180 op
= safe_vector_operand (op
, mode
);
34182 /* If we aren't optimizing, only allow one memory operand to
34184 if (memory_operand (op
, mode
))
34187 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
34189 if (optimize
|| !match
|| num_memory
> 1)
34190 op
= copy_to_mode_reg (mode
, op
);
34194 op
= copy_to_reg (op
);
34195 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
34200 args
[i
].mode
= mode
;
34206 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
34209 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
34212 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34216 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34217 args
[2].op
, args
[3].op
);
34220 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34221 args
[2].op
, args
[3].op
, args
[4].op
);
34223 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34224 args
[2].op
, args
[3].op
, args
[4].op
,
34228 gcc_unreachable ();
34238 /* Transform pattern of following layout:
34241 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34249 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34253 (parallel [ A B ... ]) */
34256 ix86_erase_embedded_rounding (rtx pat
)
34258 if (GET_CODE (pat
) == INSN
)
34259 pat
= PATTERN (pat
);
34261 gcc_assert (GET_CODE (pat
) == PARALLEL
);
34263 if (XVECLEN (pat
, 0) == 2)
34265 rtx p0
= XVECEXP (pat
, 0, 0);
34266 rtx p1
= XVECEXP (pat
, 0, 1);
34268 gcc_assert (GET_CODE (p0
) == SET
34269 && GET_CODE (p1
) == UNSPEC
34270 && XINT (p1
, 1) == UNSPEC_EMBEDDED_ROUNDING
);
34276 rtx
*res
= XALLOCAVEC (rtx
, XVECLEN (pat
, 0));
34280 for (; i
< XVECLEN (pat
, 0); ++i
)
34282 rtx elem
= XVECEXP (pat
, 0, i
);
34283 if (GET_CODE (elem
) != UNSPEC
34284 || XINT (elem
, 1) != UNSPEC_EMBEDDED_ROUNDING
)
34288 /* No more than 1 occurence was removed. */
34289 gcc_assert (j
>= XVECLEN (pat
, 0) - 1);
34291 return gen_rtx_PARALLEL (GET_MODE (pat
), gen_rtvec_v (j
, res
));
34295 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34298 ix86_expand_sse_comi_round (const struct builtin_description
*d
,
34299 tree exp
, rtx target
)
34302 tree arg0
= CALL_EXPR_ARG (exp
, 0);
34303 tree arg1
= CALL_EXPR_ARG (exp
, 1);
34304 tree arg2
= CALL_EXPR_ARG (exp
, 2);
34305 tree arg3
= CALL_EXPR_ARG (exp
, 3);
34306 rtx op0
= expand_normal (arg0
);
34307 rtx op1
= expand_normal (arg1
);
34308 rtx op2
= expand_normal (arg2
);
34309 rtx op3
= expand_normal (arg3
);
34310 enum insn_code icode
= d
->icode
;
34311 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
34312 enum machine_mode mode0
= insn_p
->operand
[0].mode
;
34313 enum machine_mode mode1
= insn_p
->operand
[1].mode
;
34314 enum rtx_code comparison
= UNEQ
;
34315 bool need_ucomi
= false;
34317 /* See avxintrin.h for values. */
34318 enum rtx_code comi_comparisons
[32] =
34320 UNEQ
, GT
, GE
, UNORDERED
, LTGT
, UNLE
, UNLT
, ORDERED
, UNEQ
, UNLT
,
34321 UNLE
, LT
, LTGT
, GE
, GT
, LT
, UNEQ
, GT
, GE
, UNORDERED
, LTGT
, UNLE
,
34322 UNLT
, ORDERED
, UNEQ
, UNLT
, UNLE
, LT
, LTGT
, GE
, GT
, LT
34324 bool need_ucomi_values
[32] =
34326 true, false, false, true, true, false, false, true,
34327 true, false, false, true, true, false, false, true,
34328 false, true, true, false, false, true, true, false,
34329 false, true, true, false, false, true, true, false
34332 if (!CONST_INT_P (op2
))
34334 error ("the third argument must be comparison constant");
34337 if (INTVAL (op2
) < 0 || INTVAL (op2
) >= 32)
34339 error ("incorect comparison mode");
34343 if (!insn_p
->operand
[2].predicate (op3
, SImode
))
34345 error ("incorrect rounding operand");
34349 comparison
= comi_comparisons
[INTVAL (op2
)];
34350 need_ucomi
= need_ucomi_values
[INTVAL (op2
)];
34352 if (VECTOR_MODE_P (mode0
))
34353 op0
= safe_vector_operand (op0
, mode0
);
34354 if (VECTOR_MODE_P (mode1
))
34355 op1
= safe_vector_operand (op1
, mode1
);
34357 target
= gen_reg_rtx (SImode
);
34358 emit_move_insn (target
, const0_rtx
);
34359 target
= gen_rtx_SUBREG (QImode
, target
, 0);
34361 if ((optimize
&& !register_operand (op0
, mode0
))
34362 || !insn_p
->operand
[0].predicate (op0
, mode0
))
34363 op0
= copy_to_mode_reg (mode0
, op0
);
34364 if ((optimize
&& !register_operand (op1
, mode1
))
34365 || !insn_p
->operand
[1].predicate (op1
, mode1
))
34366 op1
= copy_to_mode_reg (mode1
, op1
);
34369 icode
= icode
== CODE_FOR_sse_comi_round
34370 ? CODE_FOR_sse_ucomi_round
34371 : CODE_FOR_sse2_ucomi_round
;
34373 pat
= GEN_FCN (icode
) (op0
, op1
, op3
);
34377 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34378 if (INTVAL (op3
) == NO_ROUND
)
34380 pat
= ix86_erase_embedded_rounding (pat
);
34384 set_dst
= SET_DEST (pat
);
34388 gcc_assert (GET_CODE (XVECEXP (pat
, 0, 0)) == SET
);
34389 set_dst
= SET_DEST (XVECEXP (pat
, 0, 0));
34393 emit_insn (gen_rtx_SET (VOIDmode
,
34394 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
34395 gen_rtx_fmt_ee (comparison
, QImode
,
34399 return SUBREG_REG (target
);
34403 ix86_expand_round_builtin (const struct builtin_description
*d
,
34404 tree exp
, rtx target
)
34407 unsigned int i
, nargs
;
34411 enum machine_mode mode
;
34413 enum insn_code icode
= d
->icode
;
34414 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
34415 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
34416 unsigned int nargs_constant
= 0;
34417 unsigned int redundant_embed_rnd
= 0;
34419 switch ((enum ix86_builtin_func_type
) d
->flag
)
34421 case UINT64_FTYPE_V2DF_INT
:
34422 case UINT64_FTYPE_V4SF_INT
:
34423 case UINT_FTYPE_V2DF_INT
:
34424 case UINT_FTYPE_V4SF_INT
:
34425 case INT64_FTYPE_V2DF_INT
:
34426 case INT64_FTYPE_V4SF_INT
:
34427 case INT_FTYPE_V2DF_INT
:
34428 case INT_FTYPE_V4SF_INT
:
34431 case V4SF_FTYPE_V4SF_UINT_INT
:
34432 case V4SF_FTYPE_V4SF_UINT64_INT
:
34433 case V2DF_FTYPE_V2DF_UINT64_INT
:
34434 case V4SF_FTYPE_V4SF_INT_INT
:
34435 case V4SF_FTYPE_V4SF_INT64_INT
:
34436 case V2DF_FTYPE_V2DF_INT64_INT
:
34437 case V4SF_FTYPE_V4SF_V4SF_INT
:
34438 case V2DF_FTYPE_V2DF_V2DF_INT
:
34439 case V4SF_FTYPE_V4SF_V2DF_INT
:
34440 case V2DF_FTYPE_V2DF_V4SF_INT
:
34443 case V8SF_FTYPE_V8DF_V8SF_QI_INT
:
34444 case V8DF_FTYPE_V8DF_V8DF_QI_INT
:
34445 case V8SI_FTYPE_V8DF_V8SI_QI_INT
:
34446 case V16SF_FTYPE_V16SF_V16SF_HI_INT
:
34447 case V16SF_FTYPE_V16SI_V16SF_HI_INT
:
34448 case V16SI_FTYPE_V16SF_V16SI_HI_INT
:
34449 case V8DF_FTYPE_V8SF_V8DF_QI_INT
:
34450 case V16SF_FTYPE_V16HI_V16SF_HI_INT
:
34451 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT
:
34452 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT
:
34455 case V4SF_FTYPE_V4SF_V4SF_INT_INT
:
34456 case V2DF_FTYPE_V2DF_V2DF_INT_INT
:
34457 nargs_constant
= 2;
34460 case INT_FTYPE_V4SF_V4SF_INT_INT
:
34461 case INT_FTYPE_V2DF_V2DF_INT_INT
:
34462 return ix86_expand_sse_comi_round (d
, exp
, target
);
34463 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT
:
34464 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
:
34465 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT
:
34466 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT
:
34467 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT
:
34468 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT
:
34471 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT
:
34472 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT
:
34473 nargs_constant
= 4;
34476 case QI_FTYPE_V8DF_V8DF_INT_QI_INT
:
34477 case QI_FTYPE_V2DF_V2DF_INT_QI_INT
:
34478 case HI_FTYPE_V16SF_V16SF_INT_HI_INT
:
34479 case QI_FTYPE_V4SF_V4SF_INT_QI_INT
:
34480 nargs_constant
= 3;
34483 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT
:
34484 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT
:
34486 nargs_constant
= 4;
34488 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT
:
34489 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT
:
34490 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT
:
34491 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT
:
34493 nargs_constant
= 3;
34496 gcc_unreachable ();
34498 gcc_assert (nargs
<= ARRAY_SIZE (args
));
34502 || GET_MODE (target
) != tmode
34503 || !insn_p
->operand
[0].predicate (target
, tmode
))
34504 target
= gen_reg_rtx (tmode
);
34506 for (i
= 0; i
< nargs
; i
++)
34508 tree arg
= CALL_EXPR_ARG (exp
, i
);
34509 rtx op
= expand_normal (arg
);
34510 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
34511 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
34513 if (i
== nargs
- nargs_constant
)
34519 case CODE_FOR_avx512f_getmantv8df_mask_round
:
34520 case CODE_FOR_avx512f_getmantv16sf_mask_round
:
34521 case CODE_FOR_avx512f_vgetmantv2df_round
:
34522 case CODE_FOR_avx512f_vgetmantv4sf_round
:
34523 error ("the immediate argument must be a 4-bit immediate");
34525 case CODE_FOR_avx512f_cmpv8df3_mask_round
:
34526 case CODE_FOR_avx512f_cmpv16sf3_mask_round
:
34527 case CODE_FOR_avx512f_vmcmpv2df3_mask_round
:
34528 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round
:
34529 error ("the immediate argument must be a 5-bit immediate");
34532 error ("the immediate argument must be an 8-bit immediate");
34537 else if (i
== nargs
-1)
34539 if (!insn_p
->operand
[nargs
].predicate (op
, SImode
))
34541 error ("incorrect rounding operand");
34545 /* If there is no rounding use normal version of the pattern. */
34546 if (INTVAL (op
) == NO_ROUND
)
34547 redundant_embed_rnd
= 1;
34551 if (VECTOR_MODE_P (mode
))
34552 op
= safe_vector_operand (op
, mode
);
34554 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
34556 if (optimize
|| !match
)
34557 op
= copy_to_mode_reg (mode
, op
);
34561 op
= copy_to_reg (op
);
34562 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
34567 args
[i
].mode
= mode
;
34573 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
34576 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
34579 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34583 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34584 args
[2].op
, args
[3].op
);
34587 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34588 args
[2].op
, args
[3].op
, args
[4].op
);
34590 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34591 args
[2].op
, args
[3].op
, args
[4].op
,
34595 gcc_unreachable ();
34601 if (redundant_embed_rnd
)
34602 pat
= ix86_erase_embedded_rounding (pat
);
34608 /* Subroutine of ix86_expand_builtin to take care of special insns
34609 with variable number of operands. */
34612 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
34613 tree exp
, rtx target
)
34617 unsigned int i
, nargs
, arg_adjust
, memory
;
34618 bool aligned_mem
= false;
34622 enum machine_mode mode
;
34624 enum insn_code icode
= d
->icode
;
34625 bool last_arg_constant
= false;
34626 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
34627 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
34628 enum { load
, store
} klass
;
34630 switch ((enum ix86_builtin_func_type
) d
->flag
)
34632 case VOID_FTYPE_VOID
:
34633 emit_insn (GEN_FCN (icode
) (target
));
34635 case VOID_FTYPE_UINT64
:
34636 case VOID_FTYPE_UNSIGNED
:
34642 case INT_FTYPE_VOID
:
34643 case USHORT_FTYPE_VOID
:
34644 case UINT64_FTYPE_VOID
:
34645 case UNSIGNED_FTYPE_VOID
:
34650 case UINT64_FTYPE_PUNSIGNED
:
34651 case V2DI_FTYPE_PV2DI
:
34652 case V4DI_FTYPE_PV4DI
:
34653 case V32QI_FTYPE_PCCHAR
:
34654 case V16QI_FTYPE_PCCHAR
:
34655 case V8SF_FTYPE_PCV4SF
:
34656 case V8SF_FTYPE_PCFLOAT
:
34657 case V4SF_FTYPE_PCFLOAT
:
34658 case V4DF_FTYPE_PCV2DF
:
34659 case V4DF_FTYPE_PCDOUBLE
:
34660 case V2DF_FTYPE_PCDOUBLE
:
34661 case VOID_FTYPE_PVOID
:
34662 case V16SI_FTYPE_PV4SI
:
34663 case V16SF_FTYPE_PV4SF
:
34664 case V8DI_FTYPE_PV4DI
:
34665 case V8DI_FTYPE_PV8DI
:
34666 case V8DF_FTYPE_PV4DF
:
34672 case CODE_FOR_sse4_1_movntdqa
:
34673 case CODE_FOR_avx2_movntdqa
:
34674 case CODE_FOR_avx512f_movntdqa
:
34675 aligned_mem
= true;
34681 case VOID_FTYPE_PV2SF_V4SF
:
34682 case VOID_FTYPE_PV8DI_V8DI
:
34683 case VOID_FTYPE_PV4DI_V4DI
:
34684 case VOID_FTYPE_PV2DI_V2DI
:
34685 case VOID_FTYPE_PCHAR_V32QI
:
34686 case VOID_FTYPE_PCHAR_V16QI
:
34687 case VOID_FTYPE_PFLOAT_V16SF
:
34688 case VOID_FTYPE_PFLOAT_V8SF
:
34689 case VOID_FTYPE_PFLOAT_V4SF
:
34690 case VOID_FTYPE_PDOUBLE_V8DF
:
34691 case VOID_FTYPE_PDOUBLE_V4DF
:
34692 case VOID_FTYPE_PDOUBLE_V2DF
:
34693 case VOID_FTYPE_PLONGLONG_LONGLONG
:
34694 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
34695 case VOID_FTYPE_PINT_INT
:
34698 /* Reserve memory operand for target. */
34699 memory
= ARRAY_SIZE (args
);
34702 /* These builtins and instructions require the memory
34703 to be properly aligned. */
34704 case CODE_FOR_avx_movntv4di
:
34705 case CODE_FOR_sse2_movntv2di
:
34706 case CODE_FOR_avx_movntv8sf
:
34707 case CODE_FOR_sse_movntv4sf
:
34708 case CODE_FOR_sse4a_vmmovntv4sf
:
34709 case CODE_FOR_avx_movntv4df
:
34710 case CODE_FOR_sse2_movntv2df
:
34711 case CODE_FOR_sse4a_vmmovntv2df
:
34712 case CODE_FOR_sse2_movntidi
:
34713 case CODE_FOR_sse_movntq
:
34714 case CODE_FOR_sse2_movntisi
:
34715 case CODE_FOR_avx512f_movntv16sf
:
34716 case CODE_FOR_avx512f_movntv8df
:
34717 case CODE_FOR_avx512f_movntv8di
:
34718 aligned_mem
= true;
34724 case V4SF_FTYPE_V4SF_PCV2SF
:
34725 case V2DF_FTYPE_V2DF_PCDOUBLE
:
34730 case V8SF_FTYPE_PCV8SF_V8SI
:
34731 case V4DF_FTYPE_PCV4DF_V4DI
:
34732 case V4SF_FTYPE_PCV4SF_V4SI
:
34733 case V2DF_FTYPE_PCV2DF_V2DI
:
34734 case V8SI_FTYPE_PCV8SI_V8SI
:
34735 case V4DI_FTYPE_PCV4DI_V4DI
:
34736 case V4SI_FTYPE_PCV4SI_V4SI
:
34737 case V2DI_FTYPE_PCV2DI_V2DI
:
34742 case VOID_FTYPE_PV8DF_V8DF_QI
:
34743 case VOID_FTYPE_PV16SF_V16SF_HI
:
34744 case VOID_FTYPE_PV8DI_V8DI_QI
:
34745 case VOID_FTYPE_PV16SI_V16SI_HI
:
34748 /* These builtins and instructions require the memory
34749 to be properly aligned. */
34750 case CODE_FOR_avx512f_storev16sf_mask
:
34751 case CODE_FOR_avx512f_storev16si_mask
:
34752 case CODE_FOR_avx512f_storev8df_mask
:
34753 case CODE_FOR_avx512f_storev8di_mask
:
34754 case CODE_FOR_avx512vl_storev8sf_mask
:
34755 case CODE_FOR_avx512vl_storev8si_mask
:
34756 case CODE_FOR_avx512vl_storev4df_mask
:
34757 case CODE_FOR_avx512vl_storev4di_mask
:
34758 case CODE_FOR_avx512vl_storev4sf_mask
:
34759 case CODE_FOR_avx512vl_storev4si_mask
:
34760 case CODE_FOR_avx512vl_storev2df_mask
:
34761 case CODE_FOR_avx512vl_storev2di_mask
:
34762 aligned_mem
= true;
34768 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
34769 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
34770 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
34771 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
34772 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
34773 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
34774 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
34775 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
34776 case VOID_FTYPE_PDOUBLE_V2DF_QI
:
34777 case VOID_FTYPE_PFLOAT_V4SF_QI
:
34778 case VOID_FTYPE_PV8SI_V8DI_QI
:
34779 case VOID_FTYPE_PV8HI_V8DI_QI
:
34780 case VOID_FTYPE_PV16HI_V16SI_HI
:
34781 case VOID_FTYPE_PV16QI_V8DI_QI
:
34782 case VOID_FTYPE_PV16QI_V16SI_HI
:
34785 /* Reserve memory operand for target. */
34786 memory
= ARRAY_SIZE (args
);
34788 case V16SF_FTYPE_PCV16SF_V16SF_HI
:
34789 case V16SI_FTYPE_PCV16SI_V16SI_HI
:
34790 case V8DF_FTYPE_PCV8DF_V8DF_QI
:
34791 case V8DI_FTYPE_PCV8DI_V8DI_QI
:
34792 case V2DF_FTYPE_PCDOUBLE_V2DF_QI
:
34793 case V4SF_FTYPE_PCFLOAT_V4SF_QI
:
34799 /* These builtins and instructions require the memory
34800 to be properly aligned. */
34801 case CODE_FOR_avx512f_loadv16sf_mask
:
34802 case CODE_FOR_avx512f_loadv16si_mask
:
34803 case CODE_FOR_avx512f_loadv8df_mask
:
34804 case CODE_FOR_avx512f_loadv8di_mask
:
34805 case CODE_FOR_avx512vl_loadv8sf_mask
:
34806 case CODE_FOR_avx512vl_loadv8si_mask
:
34807 case CODE_FOR_avx512vl_loadv4df_mask
:
34808 case CODE_FOR_avx512vl_loadv4di_mask
:
34809 case CODE_FOR_avx512vl_loadv4sf_mask
:
34810 case CODE_FOR_avx512vl_loadv4si_mask
:
34811 case CODE_FOR_avx512vl_loadv2df_mask
:
34812 case CODE_FOR_avx512vl_loadv2di_mask
:
34813 case CODE_FOR_avx512bw_loadv64qi_mask
:
34814 case CODE_FOR_avx512vl_loadv32qi_mask
:
34815 case CODE_FOR_avx512vl_loadv16qi_mask
:
34816 case CODE_FOR_avx512bw_loadv32hi_mask
:
34817 case CODE_FOR_avx512vl_loadv16hi_mask
:
34818 case CODE_FOR_avx512vl_loadv8hi_mask
:
34819 aligned_mem
= true;
34825 case VOID_FTYPE_UINT_UINT_UINT
:
34826 case VOID_FTYPE_UINT64_UINT_UINT
:
34827 case UCHAR_FTYPE_UINT_UINT_UINT
:
34828 case UCHAR_FTYPE_UINT64_UINT_UINT
:
34831 memory
= ARRAY_SIZE (args
);
34832 last_arg_constant
= true;
34835 gcc_unreachable ();
34838 gcc_assert (nargs
<= ARRAY_SIZE (args
));
34840 if (klass
== store
)
34842 arg
= CALL_EXPR_ARG (exp
, 0);
34843 op
= expand_normal (arg
);
34844 gcc_assert (target
== 0);
34847 op
= ix86_zero_extend_to_Pmode (op
);
34848 target
= gen_rtx_MEM (tmode
, op
);
34849 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34850 on it. Try to improve it using get_pointer_alignment,
34851 and if the special builtin is one that requires strict
34852 mode alignment, also from it's GET_MODE_ALIGNMENT.
34853 Failure to do so could lead to ix86_legitimate_combined_insn
34854 rejecting all changes to such insns. */
34855 unsigned int align
= get_pointer_alignment (arg
);
34856 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (tmode
))
34857 align
= GET_MODE_ALIGNMENT (tmode
);
34858 if (MEM_ALIGN (target
) < align
)
34859 set_mem_align (target
, align
);
34862 target
= force_reg (tmode
, op
);
34870 || !register_operand (target
, tmode
)
34871 || GET_MODE (target
) != tmode
)
34872 target
= gen_reg_rtx (tmode
);
34875 for (i
= 0; i
< nargs
; i
++)
34877 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
34880 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
34881 op
= expand_normal (arg
);
34882 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
34884 if (last_arg_constant
&& (i
+ 1) == nargs
)
34888 if (icode
== CODE_FOR_lwp_lwpvalsi3
34889 || icode
== CODE_FOR_lwp_lwpinssi3
34890 || icode
== CODE_FOR_lwp_lwpvaldi3
34891 || icode
== CODE_FOR_lwp_lwpinsdi3
)
34892 error ("the last argument must be a 32-bit immediate");
34894 error ("the last argument must be an 8-bit immediate");
34902 /* This must be the memory operand. */
34903 op
= ix86_zero_extend_to_Pmode (op
);
34904 op
= gen_rtx_MEM (mode
, op
);
34905 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34906 on it. Try to improve it using get_pointer_alignment,
34907 and if the special builtin is one that requires strict
34908 mode alignment, also from it's GET_MODE_ALIGNMENT.
34909 Failure to do so could lead to ix86_legitimate_combined_insn
34910 rejecting all changes to such insns. */
34911 unsigned int align
= get_pointer_alignment (arg
);
34912 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (mode
))
34913 align
= GET_MODE_ALIGNMENT (mode
);
34914 if (MEM_ALIGN (op
) < align
)
34915 set_mem_align (op
, align
);
34919 /* This must be register. */
34920 if (VECTOR_MODE_P (mode
))
34921 op
= safe_vector_operand (op
, mode
);
34923 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
34924 op
= copy_to_mode_reg (mode
, op
);
34927 op
= copy_to_reg (op
);
34928 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
34934 args
[i
].mode
= mode
;
34940 pat
= GEN_FCN (icode
) (target
);
34943 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
34946 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
34949 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
34952 gcc_unreachable ();
34958 return klass
== store
? 0 : target
;
34961 /* Return the integer constant in ARG. Constrain it to be in the range
34962 of the subparts of VEC_TYPE; issue an error if not. */
34965 get_element_number (tree vec_type
, tree arg
)
34967 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
34969 if (!tree_fits_uhwi_p (arg
)
34970 || (elt
= tree_to_uhwi (arg
), elt
> max
))
34972 error ("selector must be an integer constant in the range 0..%wi", max
);
34979 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34980 ix86_expand_vector_init. We DO have language-level syntax for this, in
34981 the form of (type){ init-list }. Except that since we can't place emms
34982 instructions from inside the compiler, we can't allow the use of MMX
34983 registers unless the user explicitly asks for it. So we do *not* define
34984 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34985 we have builtins invoked by mmintrin.h that gives us license to emit
34986 these sorts of instructions. */
34989 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
34991 enum machine_mode tmode
= TYPE_MODE (type
);
34992 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
34993 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
34994 rtvec v
= rtvec_alloc (n_elt
);
34996 gcc_assert (VECTOR_MODE_P (tmode
));
34997 gcc_assert (call_expr_nargs (exp
) == n_elt
);
34999 for (i
= 0; i
< n_elt
; ++i
)
35001 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
35002 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
35005 if (!target
|| !register_operand (target
, tmode
))
35006 target
= gen_reg_rtx (tmode
);
35008 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
35012 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35013 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35014 had a language-level syntax for referencing vector elements. */
35017 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
35019 enum machine_mode tmode
, mode0
;
35024 arg0
= CALL_EXPR_ARG (exp
, 0);
35025 arg1
= CALL_EXPR_ARG (exp
, 1);
35027 op0
= expand_normal (arg0
);
35028 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
35030 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
35031 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
35032 gcc_assert (VECTOR_MODE_P (mode0
));
35034 op0
= force_reg (mode0
, op0
);
35036 if (optimize
|| !target
|| !register_operand (target
, tmode
))
35037 target
= gen_reg_rtx (tmode
);
35039 ix86_expand_vector_extract (true, target
, op0
, elt
);
35044 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35045 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35046 a language-level syntax for referencing vector elements. */
35049 ix86_expand_vec_set_builtin (tree exp
)
35051 enum machine_mode tmode
, mode1
;
35052 tree arg0
, arg1
, arg2
;
35054 rtx op0
, op1
, target
;
35056 arg0
= CALL_EXPR_ARG (exp
, 0);
35057 arg1
= CALL_EXPR_ARG (exp
, 1);
35058 arg2
= CALL_EXPR_ARG (exp
, 2);
35060 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
35061 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
35062 gcc_assert (VECTOR_MODE_P (tmode
));
35064 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
35065 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
35066 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
35068 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
35069 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
35071 op0
= force_reg (tmode
, op0
);
35072 op1
= force_reg (mode1
, op1
);
35074 /* OP0 is the source of these builtin functions and shouldn't be
35075 modified. Create a copy, use it and return it as target. */
35076 target
= gen_reg_rtx (tmode
);
35077 emit_move_insn (target
, op0
);
35078 ix86_expand_vector_set (true, target
, op1
, elt
);
35083 /* Expand an expression EXP that calls a built-in function,
35084 with result going to TARGET if that's convenient
35085 (and in mode MODE if that's convenient).
35086 SUBTARGET may be used as the target for computing one of EXP's operands.
35087 IGNORE is nonzero if the value is to be ignored. */
35090 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
35091 enum machine_mode mode
, int ignore
)
35093 const struct builtin_description
*d
;
35095 enum insn_code icode
;
35096 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
35097 tree arg0
, arg1
, arg2
, arg3
, arg4
;
35098 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
35099 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
35100 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
35102 /* For CPU builtins that can be folded, fold first and expand the fold. */
35105 case IX86_BUILTIN_CPU_INIT
:
35107 /* Make it call __cpu_indicator_init in libgcc. */
35108 tree call_expr
, fndecl
, type
;
35109 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
35110 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
35111 call_expr
= build_call_expr (fndecl
, 0);
35112 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
35114 case IX86_BUILTIN_CPU_IS
:
35115 case IX86_BUILTIN_CPU_SUPPORTS
:
35117 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35118 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
35119 gcc_assert (fold_expr
!= NULL_TREE
);
35120 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
35124 /* Determine whether the builtin function is available under the current ISA.
35125 Originally the builtin was not created if it wasn't applicable to the
35126 current ISA based on the command line switches. With function specific
35127 options, we need to check in the context of the function making the call
35128 whether it is supported. */
35129 if (ix86_builtins_isa
[fcode
].isa
35130 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
35132 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
35133 NULL
, (enum fpmath_unit
) 0, false);
35136 error ("%qE needs unknown isa option", fndecl
);
35139 gcc_assert (opts
!= NULL
);
35140 error ("%qE needs isa option %s", fndecl
, opts
);
35148 case IX86_BUILTIN_MASKMOVQ
:
35149 case IX86_BUILTIN_MASKMOVDQU
:
35150 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
35151 ? CODE_FOR_mmx_maskmovq
35152 : CODE_FOR_sse2_maskmovdqu
);
35153 /* Note the arg order is different from the operand order. */
35154 arg1
= CALL_EXPR_ARG (exp
, 0);
35155 arg2
= CALL_EXPR_ARG (exp
, 1);
35156 arg0
= CALL_EXPR_ARG (exp
, 2);
35157 op0
= expand_normal (arg0
);
35158 op1
= expand_normal (arg1
);
35159 op2
= expand_normal (arg2
);
35160 mode0
= insn_data
[icode
].operand
[0].mode
;
35161 mode1
= insn_data
[icode
].operand
[1].mode
;
35162 mode2
= insn_data
[icode
].operand
[2].mode
;
35164 op0
= ix86_zero_extend_to_Pmode (op0
);
35165 op0
= gen_rtx_MEM (mode1
, op0
);
35167 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
35168 op0
= copy_to_mode_reg (mode0
, op0
);
35169 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
35170 op1
= copy_to_mode_reg (mode1
, op1
);
35171 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
35172 op2
= copy_to_mode_reg (mode2
, op2
);
35173 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
35179 case IX86_BUILTIN_LDMXCSR
:
35180 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
35181 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
35182 emit_move_insn (target
, op0
);
35183 emit_insn (gen_sse_ldmxcsr (target
));
35186 case IX86_BUILTIN_STMXCSR
:
35187 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
35188 emit_insn (gen_sse_stmxcsr (target
));
35189 return copy_to_mode_reg (SImode
, target
);
35191 case IX86_BUILTIN_CLFLUSH
:
35192 arg0
= CALL_EXPR_ARG (exp
, 0);
35193 op0
= expand_normal (arg0
);
35194 icode
= CODE_FOR_sse2_clflush
;
35195 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
35196 op0
= ix86_zero_extend_to_Pmode (op0
);
35198 emit_insn (gen_sse2_clflush (op0
));
35201 case IX86_BUILTIN_CLFLUSHOPT
:
35202 arg0
= CALL_EXPR_ARG (exp
, 0);
35203 op0
= expand_normal (arg0
);
35204 icode
= CODE_FOR_clflushopt
;
35205 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
35206 op0
= ix86_zero_extend_to_Pmode (op0
);
35208 emit_insn (gen_clflushopt (op0
));
35211 case IX86_BUILTIN_MONITOR
:
35212 arg0
= CALL_EXPR_ARG (exp
, 0);
35213 arg1
= CALL_EXPR_ARG (exp
, 1);
35214 arg2
= CALL_EXPR_ARG (exp
, 2);
35215 op0
= expand_normal (arg0
);
35216 op1
= expand_normal (arg1
);
35217 op2
= expand_normal (arg2
);
35219 op0
= ix86_zero_extend_to_Pmode (op0
);
35221 op1
= copy_to_mode_reg (SImode
, op1
);
35223 op2
= copy_to_mode_reg (SImode
, op2
);
35224 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
35227 case IX86_BUILTIN_MWAIT
:
35228 arg0
= CALL_EXPR_ARG (exp
, 0);
35229 arg1
= CALL_EXPR_ARG (exp
, 1);
35230 op0
= expand_normal (arg0
);
35231 op1
= expand_normal (arg1
);
35233 op0
= copy_to_mode_reg (SImode
, op0
);
35235 op1
= copy_to_mode_reg (SImode
, op1
);
35236 emit_insn (gen_sse3_mwait (op0
, op1
));
35239 case IX86_BUILTIN_VEC_INIT_V2SI
:
35240 case IX86_BUILTIN_VEC_INIT_V4HI
:
35241 case IX86_BUILTIN_VEC_INIT_V8QI
:
35242 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
35244 case IX86_BUILTIN_VEC_EXT_V2DF
:
35245 case IX86_BUILTIN_VEC_EXT_V2DI
:
35246 case IX86_BUILTIN_VEC_EXT_V4SF
:
35247 case IX86_BUILTIN_VEC_EXT_V4SI
:
35248 case IX86_BUILTIN_VEC_EXT_V8HI
:
35249 case IX86_BUILTIN_VEC_EXT_V2SI
:
35250 case IX86_BUILTIN_VEC_EXT_V4HI
:
35251 case IX86_BUILTIN_VEC_EXT_V16QI
:
35252 return ix86_expand_vec_ext_builtin (exp
, target
);
35254 case IX86_BUILTIN_VEC_SET_V2DI
:
35255 case IX86_BUILTIN_VEC_SET_V4SF
:
35256 case IX86_BUILTIN_VEC_SET_V4SI
:
35257 case IX86_BUILTIN_VEC_SET_V8HI
:
35258 case IX86_BUILTIN_VEC_SET_V4HI
:
35259 case IX86_BUILTIN_VEC_SET_V16QI
:
35260 return ix86_expand_vec_set_builtin (exp
);
35262 case IX86_BUILTIN_INFQ
:
35263 case IX86_BUILTIN_HUGE_VALQ
:
35265 REAL_VALUE_TYPE inf
;
35269 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
35271 tmp
= validize_mem (force_const_mem (mode
, tmp
));
35274 target
= gen_reg_rtx (mode
);
35276 emit_move_insn (target
, tmp
);
35280 case IX86_BUILTIN_RDPMC
:
35281 case IX86_BUILTIN_RDTSC
:
35282 case IX86_BUILTIN_RDTSCP
:
35284 op0
= gen_reg_rtx (DImode
);
35285 op1
= gen_reg_rtx (DImode
);
35287 if (fcode
== IX86_BUILTIN_RDPMC
)
35289 arg0
= CALL_EXPR_ARG (exp
, 0);
35290 op2
= expand_normal (arg0
);
35291 if (!register_operand (op2
, SImode
))
35292 op2
= copy_to_mode_reg (SImode
, op2
);
35294 insn
= (TARGET_64BIT
35295 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
35296 : gen_rdpmc (op0
, op2
));
35299 else if (fcode
== IX86_BUILTIN_RDTSC
)
35301 insn
= (TARGET_64BIT
35302 ? gen_rdtsc_rex64 (op0
, op1
)
35303 : gen_rdtsc (op0
));
35308 op2
= gen_reg_rtx (SImode
);
35310 insn
= (TARGET_64BIT
35311 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
35312 : gen_rdtscp (op0
, op2
));
35315 arg0
= CALL_EXPR_ARG (exp
, 0);
35316 op4
= expand_normal (arg0
);
35317 if (!address_operand (op4
, VOIDmode
))
35319 op4
= convert_memory_address (Pmode
, op4
);
35320 op4
= copy_addr_to_reg (op4
);
35322 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
35327 /* mode is VOIDmode if __builtin_rd* has been called
35329 if (mode
== VOIDmode
)
35331 target
= gen_reg_rtx (mode
);
35336 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
35337 op1
, 1, OPTAB_DIRECT
);
35338 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
35339 op0
, 1, OPTAB_DIRECT
);
35342 emit_move_insn (target
, op0
);
35345 case IX86_BUILTIN_FXSAVE
:
35346 case IX86_BUILTIN_FXRSTOR
:
35347 case IX86_BUILTIN_FXSAVE64
:
35348 case IX86_BUILTIN_FXRSTOR64
:
35349 case IX86_BUILTIN_FNSTENV
:
35350 case IX86_BUILTIN_FLDENV
:
35354 case IX86_BUILTIN_FXSAVE
:
35355 icode
= CODE_FOR_fxsave
;
35357 case IX86_BUILTIN_FXRSTOR
:
35358 icode
= CODE_FOR_fxrstor
;
35360 case IX86_BUILTIN_FXSAVE64
:
35361 icode
= CODE_FOR_fxsave64
;
35363 case IX86_BUILTIN_FXRSTOR64
:
35364 icode
= CODE_FOR_fxrstor64
;
35366 case IX86_BUILTIN_FNSTENV
:
35367 icode
= CODE_FOR_fnstenv
;
35369 case IX86_BUILTIN_FLDENV
:
35370 icode
= CODE_FOR_fldenv
;
35373 gcc_unreachable ();
35376 arg0
= CALL_EXPR_ARG (exp
, 0);
35377 op0
= expand_normal (arg0
);
35379 if (!address_operand (op0
, VOIDmode
))
35381 op0
= convert_memory_address (Pmode
, op0
);
35382 op0
= copy_addr_to_reg (op0
);
35384 op0
= gen_rtx_MEM (mode0
, op0
);
35386 pat
= GEN_FCN (icode
) (op0
);
35391 case IX86_BUILTIN_XSAVE
:
35392 case IX86_BUILTIN_XRSTOR
:
35393 case IX86_BUILTIN_XSAVE64
:
35394 case IX86_BUILTIN_XRSTOR64
:
35395 case IX86_BUILTIN_XSAVEOPT
:
35396 case IX86_BUILTIN_XSAVEOPT64
:
35397 case IX86_BUILTIN_XSAVES
:
35398 case IX86_BUILTIN_XRSTORS
:
35399 case IX86_BUILTIN_XSAVES64
:
35400 case IX86_BUILTIN_XRSTORS64
:
35401 case IX86_BUILTIN_XSAVEC
:
35402 case IX86_BUILTIN_XSAVEC64
:
35403 arg0
= CALL_EXPR_ARG (exp
, 0);
35404 arg1
= CALL_EXPR_ARG (exp
, 1);
35405 op0
= expand_normal (arg0
);
35406 op1
= expand_normal (arg1
);
35408 if (!address_operand (op0
, VOIDmode
))
35410 op0
= convert_memory_address (Pmode
, op0
);
35411 op0
= copy_addr_to_reg (op0
);
35413 op0
= gen_rtx_MEM (BLKmode
, op0
);
35415 op1
= force_reg (DImode
, op1
);
35419 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
35420 NULL
, 1, OPTAB_DIRECT
);
35423 case IX86_BUILTIN_XSAVE
:
35424 icode
= CODE_FOR_xsave_rex64
;
35426 case IX86_BUILTIN_XRSTOR
:
35427 icode
= CODE_FOR_xrstor_rex64
;
35429 case IX86_BUILTIN_XSAVE64
:
35430 icode
= CODE_FOR_xsave64
;
35432 case IX86_BUILTIN_XRSTOR64
:
35433 icode
= CODE_FOR_xrstor64
;
35435 case IX86_BUILTIN_XSAVEOPT
:
35436 icode
= CODE_FOR_xsaveopt_rex64
;
35438 case IX86_BUILTIN_XSAVEOPT64
:
35439 icode
= CODE_FOR_xsaveopt64
;
35441 case IX86_BUILTIN_XSAVES
:
35442 icode
= CODE_FOR_xsaves_rex64
;
35444 case IX86_BUILTIN_XRSTORS
:
35445 icode
= CODE_FOR_xrstors_rex64
;
35447 case IX86_BUILTIN_XSAVES64
:
35448 icode
= CODE_FOR_xsaves64
;
35450 case IX86_BUILTIN_XRSTORS64
:
35451 icode
= CODE_FOR_xrstors64
;
35453 case IX86_BUILTIN_XSAVEC
:
35454 icode
= CODE_FOR_xsavec_rex64
;
35456 case IX86_BUILTIN_XSAVEC64
:
35457 icode
= CODE_FOR_xsavec64
;
35460 gcc_unreachable ();
35463 op2
= gen_lowpart (SImode
, op2
);
35464 op1
= gen_lowpart (SImode
, op1
);
35465 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
35471 case IX86_BUILTIN_XSAVE
:
35472 icode
= CODE_FOR_xsave
;
35474 case IX86_BUILTIN_XRSTOR
:
35475 icode
= CODE_FOR_xrstor
;
35477 case IX86_BUILTIN_XSAVEOPT
:
35478 icode
= CODE_FOR_xsaveopt
;
35480 case IX86_BUILTIN_XSAVES
:
35481 icode
= CODE_FOR_xsaves
;
35483 case IX86_BUILTIN_XRSTORS
:
35484 icode
= CODE_FOR_xrstors
;
35486 case IX86_BUILTIN_XSAVEC
:
35487 icode
= CODE_FOR_xsavec
;
35490 gcc_unreachable ();
35492 pat
= GEN_FCN (icode
) (op0
, op1
);
35499 case IX86_BUILTIN_LLWPCB
:
35500 arg0
= CALL_EXPR_ARG (exp
, 0);
35501 op0
= expand_normal (arg0
);
35502 icode
= CODE_FOR_lwp_llwpcb
;
35503 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
35504 op0
= ix86_zero_extend_to_Pmode (op0
);
35505 emit_insn (gen_lwp_llwpcb (op0
));
35508 case IX86_BUILTIN_SLWPCB
:
35509 icode
= CODE_FOR_lwp_slwpcb
;
35511 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
35512 target
= gen_reg_rtx (Pmode
);
35513 emit_insn (gen_lwp_slwpcb (target
));
35516 case IX86_BUILTIN_BEXTRI32
:
35517 case IX86_BUILTIN_BEXTRI64
:
35518 arg0
= CALL_EXPR_ARG (exp
, 0);
35519 arg1
= CALL_EXPR_ARG (exp
, 1);
35520 op0
= expand_normal (arg0
);
35521 op1
= expand_normal (arg1
);
35522 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
35523 ? CODE_FOR_tbm_bextri_si
35524 : CODE_FOR_tbm_bextri_di
);
35525 if (!CONST_INT_P (op1
))
35527 error ("last argument must be an immediate");
35532 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
35533 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
35534 op1
= GEN_INT (length
);
35535 op2
= GEN_INT (lsb_index
);
35536 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
35542 case IX86_BUILTIN_RDRAND16_STEP
:
35543 icode
= CODE_FOR_rdrandhi_1
;
35547 case IX86_BUILTIN_RDRAND32_STEP
:
35548 icode
= CODE_FOR_rdrandsi_1
;
35552 case IX86_BUILTIN_RDRAND64_STEP
:
35553 icode
= CODE_FOR_rdranddi_1
;
35557 op0
= gen_reg_rtx (mode0
);
35558 emit_insn (GEN_FCN (icode
) (op0
));
35560 arg0
= CALL_EXPR_ARG (exp
, 0);
35561 op1
= expand_normal (arg0
);
35562 if (!address_operand (op1
, VOIDmode
))
35564 op1
= convert_memory_address (Pmode
, op1
);
35565 op1
= copy_addr_to_reg (op1
);
35567 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
35569 op1
= gen_reg_rtx (SImode
);
35570 emit_move_insn (op1
, CONST1_RTX (SImode
));
35572 /* Emit SImode conditional move. */
35573 if (mode0
== HImode
)
35575 op2
= gen_reg_rtx (SImode
);
35576 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
35578 else if (mode0
== SImode
)
35581 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
35584 || !register_operand (target
, SImode
))
35585 target
= gen_reg_rtx (SImode
);
35587 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
35589 emit_insn (gen_rtx_SET (VOIDmode
, target
,
35590 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
35593 case IX86_BUILTIN_RDSEED16_STEP
:
35594 icode
= CODE_FOR_rdseedhi_1
;
35598 case IX86_BUILTIN_RDSEED32_STEP
:
35599 icode
= CODE_FOR_rdseedsi_1
;
35603 case IX86_BUILTIN_RDSEED64_STEP
:
35604 icode
= CODE_FOR_rdseeddi_1
;
35608 op0
= gen_reg_rtx (mode0
);
35609 emit_insn (GEN_FCN (icode
) (op0
));
35611 arg0
= CALL_EXPR_ARG (exp
, 0);
35612 op1
= expand_normal (arg0
);
35613 if (!address_operand (op1
, VOIDmode
))
35615 op1
= convert_memory_address (Pmode
, op1
);
35616 op1
= copy_addr_to_reg (op1
);
35618 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
35620 op2
= gen_reg_rtx (QImode
);
35622 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
35624 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
35627 || !register_operand (target
, SImode
))
35628 target
= gen_reg_rtx (SImode
);
35630 emit_insn (gen_zero_extendqisi2 (target
, op2
));
35633 case IX86_BUILTIN_SBB32
:
35634 icode
= CODE_FOR_subsi3_carry
;
35638 case IX86_BUILTIN_SBB64
:
35639 icode
= CODE_FOR_subdi3_carry
;
35643 case IX86_BUILTIN_ADDCARRYX32
:
35644 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
35648 case IX86_BUILTIN_ADDCARRYX64
:
35649 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
35653 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
35654 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
35655 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
35656 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
35658 op0
= gen_reg_rtx (QImode
);
35660 /* Generate CF from input operand. */
35661 op1
= expand_normal (arg0
);
35662 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
35663 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
35665 /* Gen ADCX instruction to compute X+Y+CF. */
35666 op2
= expand_normal (arg1
);
35667 op3
= expand_normal (arg2
);
35670 op2
= copy_to_mode_reg (mode0
, op2
);
35672 op3
= copy_to_mode_reg (mode0
, op3
);
35674 op0
= gen_reg_rtx (mode0
);
35676 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
35677 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
35678 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
35680 /* Store the result. */
35681 op4
= expand_normal (arg3
);
35682 if (!address_operand (op4
, VOIDmode
))
35684 op4
= convert_memory_address (Pmode
, op4
);
35685 op4
= copy_addr_to_reg (op4
);
35687 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
35689 /* Return current CF value. */
35691 target
= gen_reg_rtx (QImode
);
35693 PUT_MODE (pat
, QImode
);
35694 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
35697 case IX86_BUILTIN_READ_FLAGS
:
35698 emit_insn (gen_push (gen_rtx_REG (word_mode
, FLAGS_REG
)));
35701 || target
== NULL_RTX
35702 || !nonimmediate_operand (target
, word_mode
)
35703 || GET_MODE (target
) != word_mode
)
35704 target
= gen_reg_rtx (word_mode
);
35706 emit_insn (gen_pop (target
));
35709 case IX86_BUILTIN_WRITE_FLAGS
:
35711 arg0
= CALL_EXPR_ARG (exp
, 0);
35712 op0
= expand_normal (arg0
);
35713 if (!general_no_elim_operand (op0
, word_mode
))
35714 op0
= copy_to_mode_reg (word_mode
, op0
);
35716 emit_insn (gen_push (op0
));
35717 emit_insn (gen_pop (gen_rtx_REG (word_mode
, FLAGS_REG
)));
35720 case IX86_BUILTIN_KORTESTC16
:
35721 icode
= CODE_FOR_kortestchi
;
35726 case IX86_BUILTIN_KORTESTZ16
:
35727 icode
= CODE_FOR_kortestzhi
;
35732 arg0
= CALL_EXPR_ARG (exp
, 0); /* Mask reg src1. */
35733 arg1
= CALL_EXPR_ARG (exp
, 1); /* Mask reg src2. */
35734 op0
= expand_normal (arg0
);
35735 op1
= expand_normal (arg1
);
35737 op0
= copy_to_reg (op0
);
35738 op0
= simplify_gen_subreg (mode0
, op0
, GET_MODE (op0
), 0);
35739 op1
= copy_to_reg (op1
);
35740 op1
= simplify_gen_subreg (mode0
, op1
, GET_MODE (op1
), 0);
35742 target
= gen_reg_rtx (QImode
);
35743 emit_insn (gen_rtx_SET (mode0
, target
, const0_rtx
));
35745 /* Emit kortest. */
35746 emit_insn (GEN_FCN (icode
) (op0
, op1
));
35747 /* And use setcc to return result from flags. */
35748 ix86_expand_setcc (target
, EQ
,
35749 gen_rtx_REG (mode1
, FLAGS_REG
), const0_rtx
);
35752 case IX86_BUILTIN_GATHERSIV2DF
:
35753 icode
= CODE_FOR_avx2_gathersiv2df
;
35755 case IX86_BUILTIN_GATHERSIV4DF
:
35756 icode
= CODE_FOR_avx2_gathersiv4df
;
35758 case IX86_BUILTIN_GATHERDIV2DF
:
35759 icode
= CODE_FOR_avx2_gatherdiv2df
;
35761 case IX86_BUILTIN_GATHERDIV4DF
:
35762 icode
= CODE_FOR_avx2_gatherdiv4df
;
35764 case IX86_BUILTIN_GATHERSIV4SF
:
35765 icode
= CODE_FOR_avx2_gathersiv4sf
;
35767 case IX86_BUILTIN_GATHERSIV8SF
:
35768 icode
= CODE_FOR_avx2_gathersiv8sf
;
35770 case IX86_BUILTIN_GATHERDIV4SF
:
35771 icode
= CODE_FOR_avx2_gatherdiv4sf
;
35773 case IX86_BUILTIN_GATHERDIV8SF
:
35774 icode
= CODE_FOR_avx2_gatherdiv8sf
;
35776 case IX86_BUILTIN_GATHERSIV2DI
:
35777 icode
= CODE_FOR_avx2_gathersiv2di
;
35779 case IX86_BUILTIN_GATHERSIV4DI
:
35780 icode
= CODE_FOR_avx2_gathersiv4di
;
35782 case IX86_BUILTIN_GATHERDIV2DI
:
35783 icode
= CODE_FOR_avx2_gatherdiv2di
;
35785 case IX86_BUILTIN_GATHERDIV4DI
:
35786 icode
= CODE_FOR_avx2_gatherdiv4di
;
35788 case IX86_BUILTIN_GATHERSIV4SI
:
35789 icode
= CODE_FOR_avx2_gathersiv4si
;
35791 case IX86_BUILTIN_GATHERSIV8SI
:
35792 icode
= CODE_FOR_avx2_gathersiv8si
;
35794 case IX86_BUILTIN_GATHERDIV4SI
:
35795 icode
= CODE_FOR_avx2_gatherdiv4si
;
35797 case IX86_BUILTIN_GATHERDIV8SI
:
35798 icode
= CODE_FOR_avx2_gatherdiv8si
;
35800 case IX86_BUILTIN_GATHERALTSIV4DF
:
35801 icode
= CODE_FOR_avx2_gathersiv4df
;
35803 case IX86_BUILTIN_GATHERALTDIV8SF
:
35804 icode
= CODE_FOR_avx2_gatherdiv8sf
;
35806 case IX86_BUILTIN_GATHERALTSIV4DI
:
35807 icode
= CODE_FOR_avx2_gathersiv4di
;
35809 case IX86_BUILTIN_GATHERALTDIV8SI
:
35810 icode
= CODE_FOR_avx2_gatherdiv8si
;
35812 case IX86_BUILTIN_GATHER3SIV16SF
:
35813 icode
= CODE_FOR_avx512f_gathersiv16sf
;
35815 case IX86_BUILTIN_GATHER3SIV8DF
:
35816 icode
= CODE_FOR_avx512f_gathersiv8df
;
35818 case IX86_BUILTIN_GATHER3DIV16SF
:
35819 icode
= CODE_FOR_avx512f_gatherdiv16sf
;
35821 case IX86_BUILTIN_GATHER3DIV8DF
:
35822 icode
= CODE_FOR_avx512f_gatherdiv8df
;
35824 case IX86_BUILTIN_GATHER3SIV16SI
:
35825 icode
= CODE_FOR_avx512f_gathersiv16si
;
35827 case IX86_BUILTIN_GATHER3SIV8DI
:
35828 icode
= CODE_FOR_avx512f_gathersiv8di
;
35830 case IX86_BUILTIN_GATHER3DIV16SI
:
35831 icode
= CODE_FOR_avx512f_gatherdiv16si
;
35833 case IX86_BUILTIN_GATHER3DIV8DI
:
35834 icode
= CODE_FOR_avx512f_gatherdiv8di
;
35836 case IX86_BUILTIN_GATHER3ALTSIV8DF
:
35837 icode
= CODE_FOR_avx512f_gathersiv8df
;
35839 case IX86_BUILTIN_GATHER3ALTDIV16SF
:
35840 icode
= CODE_FOR_avx512f_gatherdiv16sf
;
35842 case IX86_BUILTIN_GATHER3ALTSIV8DI
:
35843 icode
= CODE_FOR_avx512f_gathersiv8di
;
35845 case IX86_BUILTIN_GATHER3ALTDIV16SI
:
35846 icode
= CODE_FOR_avx512f_gatherdiv16si
;
35848 case IX86_BUILTIN_SCATTERSIV16SF
:
35849 icode
= CODE_FOR_avx512f_scattersiv16sf
;
35851 case IX86_BUILTIN_SCATTERSIV8DF
:
35852 icode
= CODE_FOR_avx512f_scattersiv8df
;
35854 case IX86_BUILTIN_SCATTERDIV16SF
:
35855 icode
= CODE_FOR_avx512f_scatterdiv16sf
;
35857 case IX86_BUILTIN_SCATTERDIV8DF
:
35858 icode
= CODE_FOR_avx512f_scatterdiv8df
;
35860 case IX86_BUILTIN_SCATTERSIV16SI
:
35861 icode
= CODE_FOR_avx512f_scattersiv16si
;
35863 case IX86_BUILTIN_SCATTERSIV8DI
:
35864 icode
= CODE_FOR_avx512f_scattersiv8di
;
35866 case IX86_BUILTIN_SCATTERDIV16SI
:
35867 icode
= CODE_FOR_avx512f_scatterdiv16si
;
35869 case IX86_BUILTIN_SCATTERDIV8DI
:
35870 icode
= CODE_FOR_avx512f_scatterdiv8di
;
35873 case IX86_BUILTIN_GATHERPFDPD
:
35874 icode
= CODE_FOR_avx512pf_gatherpfv8sidf
;
35875 goto vec_prefetch_gen
;
35876 case IX86_BUILTIN_GATHERPFDPS
:
35877 icode
= CODE_FOR_avx512pf_gatherpfv16sisf
;
35878 goto vec_prefetch_gen
;
35879 case IX86_BUILTIN_GATHERPFQPD
:
35880 icode
= CODE_FOR_avx512pf_gatherpfv8didf
;
35881 goto vec_prefetch_gen
;
35882 case IX86_BUILTIN_GATHERPFQPS
:
35883 icode
= CODE_FOR_avx512pf_gatherpfv8disf
;
35884 goto vec_prefetch_gen
;
35885 case IX86_BUILTIN_SCATTERPFDPD
:
35886 icode
= CODE_FOR_avx512pf_scatterpfv8sidf
;
35887 goto vec_prefetch_gen
;
35888 case IX86_BUILTIN_SCATTERPFDPS
:
35889 icode
= CODE_FOR_avx512pf_scatterpfv16sisf
;
35890 goto vec_prefetch_gen
;
35891 case IX86_BUILTIN_SCATTERPFQPD
:
35892 icode
= CODE_FOR_avx512pf_scatterpfv8didf
;
35893 goto vec_prefetch_gen
;
35894 case IX86_BUILTIN_SCATTERPFQPS
:
35895 icode
= CODE_FOR_avx512pf_scatterpfv8disf
;
35896 goto vec_prefetch_gen
;
35900 rtx (*gen
) (rtx
, rtx
);
35902 arg0
= CALL_EXPR_ARG (exp
, 0);
35903 arg1
= CALL_EXPR_ARG (exp
, 1);
35904 arg2
= CALL_EXPR_ARG (exp
, 2);
35905 arg3
= CALL_EXPR_ARG (exp
, 3);
35906 arg4
= CALL_EXPR_ARG (exp
, 4);
35907 op0
= expand_normal (arg0
);
35908 op1
= expand_normal (arg1
);
35909 op2
= expand_normal (arg2
);
35910 op3
= expand_normal (arg3
);
35911 op4
= expand_normal (arg4
);
35912 /* Note the arg order is different from the operand order. */
35913 mode0
= insn_data
[icode
].operand
[1].mode
;
35914 mode2
= insn_data
[icode
].operand
[3].mode
;
35915 mode3
= insn_data
[icode
].operand
[4].mode
;
35916 mode4
= insn_data
[icode
].operand
[5].mode
;
35918 if (target
== NULL_RTX
35919 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
35920 || !insn_data
[icode
].operand
[0].predicate (target
,
35921 GET_MODE (target
)))
35922 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
35924 subtarget
= target
;
35928 case IX86_BUILTIN_GATHER3ALTSIV8DF
:
35929 case IX86_BUILTIN_GATHER3ALTSIV8DI
:
35930 half
= gen_reg_rtx (V8SImode
);
35931 if (!nonimmediate_operand (op2
, V16SImode
))
35932 op2
= copy_to_mode_reg (V16SImode
, op2
);
35933 emit_insn (gen_vec_extract_lo_v16si (half
, op2
));
35936 case IX86_BUILTIN_GATHERALTSIV4DF
:
35937 case IX86_BUILTIN_GATHERALTSIV4DI
:
35938 half
= gen_reg_rtx (V4SImode
);
35939 if (!nonimmediate_operand (op2
, V8SImode
))
35940 op2
= copy_to_mode_reg (V8SImode
, op2
);
35941 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
35944 case IX86_BUILTIN_GATHER3ALTDIV16SF
:
35945 case IX86_BUILTIN_GATHER3ALTDIV16SI
:
35946 half
= gen_reg_rtx (mode0
);
35947 if (mode0
== V8SFmode
)
35948 gen
= gen_vec_extract_lo_v16sf
;
35950 gen
= gen_vec_extract_lo_v16si
;
35951 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
35952 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
35953 emit_insn (gen (half
, op0
));
35955 if (GET_MODE (op3
) != VOIDmode
)
35957 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
35958 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
35959 emit_insn (gen (half
, op3
));
35963 case IX86_BUILTIN_GATHERALTDIV8SF
:
35964 case IX86_BUILTIN_GATHERALTDIV8SI
:
35965 half
= gen_reg_rtx (mode0
);
35966 if (mode0
== V4SFmode
)
35967 gen
= gen_vec_extract_lo_v8sf
;
35969 gen
= gen_vec_extract_lo_v8si
;
35970 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
35971 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
35972 emit_insn (gen (half
, op0
));
35974 if (GET_MODE (op3
) != VOIDmode
)
35976 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
35977 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
35978 emit_insn (gen (half
, op3
));
35986 /* Force memory operand only with base register here. But we
35987 don't want to do it on memory operand for other builtin
35989 op1
= ix86_zero_extend_to_Pmode (op1
);
35991 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
35992 op0
= copy_to_mode_reg (mode0
, op0
);
35993 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
35994 op1
= copy_to_mode_reg (Pmode
, op1
);
35995 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
35996 op2
= copy_to_mode_reg (mode2
, op2
);
35997 if (GET_MODE (op3
) == mode3
|| GET_MODE (op3
) == VOIDmode
)
35999 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
36000 op3
= copy_to_mode_reg (mode3
, op3
);
36004 op3
= copy_to_reg (op3
);
36005 op3
= simplify_gen_subreg (mode3
, op3
, GET_MODE (op3
), 0);
36007 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
36009 error ("the last argument must be scale 1, 2, 4, 8");
36013 /* Optimize. If mask is known to have all high bits set,
36014 replace op0 with pc_rtx to signal that the instruction
36015 overwrites the whole destination and doesn't use its
36016 previous contents. */
36019 if (TREE_CODE (arg3
) == INTEGER_CST
)
36021 if (integer_all_onesp (arg3
))
36024 else if (TREE_CODE (arg3
) == VECTOR_CST
)
36026 unsigned int negative
= 0;
36027 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
36029 tree cst
= VECTOR_CST_ELT (arg3
, i
);
36030 if (TREE_CODE (cst
) == INTEGER_CST
36031 && tree_int_cst_sign_bit (cst
))
36033 else if (TREE_CODE (cst
) == REAL_CST
36034 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
36037 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
36040 else if (TREE_CODE (arg3
) == SSA_NAME
36041 && TREE_CODE (TREE_TYPE (arg3
)) == VECTOR_TYPE
)
36043 /* Recognize also when mask is like:
36044 __v2df src = _mm_setzero_pd ();
36045 __v2df mask = _mm_cmpeq_pd (src, src);
36047 __v8sf src = _mm256_setzero_ps ();
36048 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36049 as that is a cheaper way to load all ones into
36050 a register than having to load a constant from
36052 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
36053 if (is_gimple_call (def_stmt
))
36055 tree fndecl
= gimple_call_fndecl (def_stmt
);
36057 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
36058 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
36060 case IX86_BUILTIN_CMPPD
:
36061 case IX86_BUILTIN_CMPPS
:
36062 case IX86_BUILTIN_CMPPD256
:
36063 case IX86_BUILTIN_CMPPS256
:
36064 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
36067 case IX86_BUILTIN_CMPEQPD
:
36068 case IX86_BUILTIN_CMPEQPS
:
36069 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
36070 && initializer_zerop (gimple_call_arg (def_stmt
,
36081 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
36088 case IX86_BUILTIN_GATHER3DIV16SF
:
36089 if (target
== NULL_RTX
)
36090 target
= gen_reg_rtx (V8SFmode
);
36091 emit_insn (gen_vec_extract_lo_v16sf (target
, subtarget
));
36093 case IX86_BUILTIN_GATHER3DIV16SI
:
36094 if (target
== NULL_RTX
)
36095 target
= gen_reg_rtx (V8SImode
);
36096 emit_insn (gen_vec_extract_lo_v16si (target
, subtarget
));
36098 case IX86_BUILTIN_GATHERDIV8SF
:
36099 if (target
== NULL_RTX
)
36100 target
= gen_reg_rtx (V4SFmode
);
36101 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
36103 case IX86_BUILTIN_GATHERDIV8SI
:
36104 if (target
== NULL_RTX
)
36105 target
= gen_reg_rtx (V4SImode
);
36106 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
36109 target
= subtarget
;
36115 arg0
= CALL_EXPR_ARG (exp
, 0);
36116 arg1
= CALL_EXPR_ARG (exp
, 1);
36117 arg2
= CALL_EXPR_ARG (exp
, 2);
36118 arg3
= CALL_EXPR_ARG (exp
, 3);
36119 arg4
= CALL_EXPR_ARG (exp
, 4);
36120 op0
= expand_normal (arg0
);
36121 op1
= expand_normal (arg1
);
36122 op2
= expand_normal (arg2
);
36123 op3
= expand_normal (arg3
);
36124 op4
= expand_normal (arg4
);
36125 mode1
= insn_data
[icode
].operand
[1].mode
;
36126 mode2
= insn_data
[icode
].operand
[2].mode
;
36127 mode3
= insn_data
[icode
].operand
[3].mode
;
36128 mode4
= insn_data
[icode
].operand
[4].mode
;
36130 /* Force memory operand only with base register here. But we
36131 don't want to do it on memory operand for other builtin
36133 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
36135 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
36136 op0
= copy_to_mode_reg (Pmode
, op0
);
36138 if (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
36140 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
36141 op1
= copy_to_mode_reg (mode1
, op1
);
36145 op1
= copy_to_reg (op1
);
36146 op1
= simplify_gen_subreg (mode1
, op1
, GET_MODE (op1
), 0);
36149 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
36150 op2
= copy_to_mode_reg (mode2
, op2
);
36152 if (!insn_data
[icode
].operand
[3].predicate (op3
, mode3
))
36153 op3
= copy_to_mode_reg (mode3
, op3
);
36155 if (!insn_data
[icode
].operand
[4].predicate (op4
, mode4
))
36157 error ("the last argument must be scale 1, 2, 4, 8");
36161 pat
= GEN_FCN (icode
) (op0
, op1
, op2
, op3
, op4
);
36169 arg0
= CALL_EXPR_ARG (exp
, 0);
36170 arg1
= CALL_EXPR_ARG (exp
, 1);
36171 arg2
= CALL_EXPR_ARG (exp
, 2);
36172 arg3
= CALL_EXPR_ARG (exp
, 3);
36173 arg4
= CALL_EXPR_ARG (exp
, 4);
36174 op0
= expand_normal (arg0
);
36175 op1
= expand_normal (arg1
);
36176 op2
= expand_normal (arg2
);
36177 op3
= expand_normal (arg3
);
36178 op4
= expand_normal (arg4
);
36179 mode0
= insn_data
[icode
].operand
[0].mode
;
36180 mode1
= insn_data
[icode
].operand
[1].mode
;
36181 mode3
= insn_data
[icode
].operand
[3].mode
;
36182 mode4
= insn_data
[icode
].operand
[4].mode
;
36184 if (GET_MODE (op0
) == mode0
36185 || (GET_MODE (op0
) == VOIDmode
&& op0
!= constm1_rtx
))
36187 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
36188 op0
= copy_to_mode_reg (mode0
, op0
);
36190 else if (op0
!= constm1_rtx
)
36192 op0
= copy_to_reg (op0
);
36193 op0
= simplify_gen_subreg (mode0
, op0
, GET_MODE (op0
), 0);
36196 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
36197 op1
= copy_to_mode_reg (mode1
, op1
);
36199 /* Force memory operand only with base register here. But we
36200 don't want to do it on memory operand for other builtin
36202 op2
= force_reg (Pmode
, convert_to_mode (Pmode
, op2
, 1));
36204 if (!insn_data
[icode
].operand
[2].predicate (op2
, Pmode
))
36205 op2
= copy_to_mode_reg (Pmode
, op2
);
36207 if (!insn_data
[icode
].operand
[3].predicate (op3
, mode3
))
36209 error ("the forth argument must be scale 1, 2, 4, 8");
36213 if (!insn_data
[icode
].operand
[4].predicate (op4
, mode4
))
36215 error ("incorrect hint operand");
36219 pat
= GEN_FCN (icode
) (op0
, op1
, op2
, op3
, op4
);
36227 case IX86_BUILTIN_XABORT
:
36228 icode
= CODE_FOR_xabort
;
36229 arg0
= CALL_EXPR_ARG (exp
, 0);
36230 op0
= expand_normal (arg0
);
36231 mode0
= insn_data
[icode
].operand
[0].mode
;
36232 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
36234 error ("the xabort's argument must be an 8-bit immediate");
36237 emit_insn (gen_xabort (op0
));
36244 for (i
= 0, d
= bdesc_special_args
;
36245 i
< ARRAY_SIZE (bdesc_special_args
);
36247 if (d
->code
== fcode
)
36248 return ix86_expand_special_args_builtin (d
, exp
, target
);
36250 for (i
= 0, d
= bdesc_args
;
36251 i
< ARRAY_SIZE (bdesc_args
);
36253 if (d
->code
== fcode
)
36256 case IX86_BUILTIN_FABSQ
:
36257 case IX86_BUILTIN_COPYSIGNQ
:
36259 /* Emit a normal call if SSE isn't available. */
36260 return expand_call (exp
, target
, ignore
);
36262 return ix86_expand_args_builtin (d
, exp
, target
);
36265 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
36266 if (d
->code
== fcode
)
36267 return ix86_expand_sse_comi (d
, exp
, target
);
36269 for (i
= 0, d
= bdesc_round_args
; i
< ARRAY_SIZE (bdesc_round_args
); i
++, d
++)
36270 if (d
->code
== fcode
)
36271 return ix86_expand_round_builtin (d
, exp
, target
);
36273 for (i
= 0, d
= bdesc_pcmpestr
;
36274 i
< ARRAY_SIZE (bdesc_pcmpestr
);
36276 if (d
->code
== fcode
)
36277 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
36279 for (i
= 0, d
= bdesc_pcmpistr
;
36280 i
< ARRAY_SIZE (bdesc_pcmpistr
);
36282 if (d
->code
== fcode
)
36283 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
36285 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
36286 if (d
->code
== fcode
)
36287 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
36288 (enum ix86_builtin_func_type
)
36289 d
->flag
, d
->comparison
);
36291 gcc_unreachable ();
36294 /* This returns the target-specific builtin with code CODE if
36295 current_function_decl has visibility on this builtin, which is checked
36296 using isa flags. Returns NULL_TREE otherwise. */
36298 static tree
ix86_get_builtin (enum ix86_builtins code
)
36300 struct cl_target_option
*opts
;
36301 tree target_tree
= NULL_TREE
;
36303 /* Determine the isa flags of current_function_decl. */
36305 if (current_function_decl
)
36306 target_tree
= DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
);
36308 if (target_tree
== NULL
)
36309 target_tree
= target_option_default_node
;
36311 opts
= TREE_TARGET_OPTION (target_tree
);
36313 if (ix86_builtins_isa
[(int) code
].isa
& opts
->x_ix86_isa_flags
)
36314 return ix86_builtin_decl (code
, true);
36319 /* Returns a function decl for a vectorized version of the builtin function
36320 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36321 if it is not available. */
36324 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
36327 enum machine_mode in_mode
, out_mode
;
36329 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
36331 if (TREE_CODE (type_out
) != VECTOR_TYPE
36332 || TREE_CODE (type_in
) != VECTOR_TYPE
36333 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
36336 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
36337 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
36338 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
36339 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
36343 case BUILT_IN_SQRT
:
36344 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36346 if (out_n
== 2 && in_n
== 2)
36347 return ix86_get_builtin (IX86_BUILTIN_SQRTPD
);
36348 else if (out_n
== 4 && in_n
== 4)
36349 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256
);
36350 else if (out_n
== 8 && in_n
== 8)
36351 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512
);
36355 case BUILT_IN_EXP2F
:
36356 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36358 if (out_n
== 16 && in_n
== 16)
36359 return ix86_get_builtin (IX86_BUILTIN_EXP2PS
);
36363 case BUILT_IN_SQRTF
:
36364 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36366 if (out_n
== 4 && in_n
== 4)
36367 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR
);
36368 else if (out_n
== 8 && in_n
== 8)
36369 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256
);
36370 else if (out_n
== 16 && in_n
== 16)
36371 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512
);
36375 case BUILT_IN_IFLOOR
:
36376 case BUILT_IN_LFLOOR
:
36377 case BUILT_IN_LLFLOOR
:
36378 /* The round insn does not trap on denormals. */
36379 if (flag_trapping_math
|| !TARGET_ROUND
)
36382 if (out_mode
== SImode
&& in_mode
== DFmode
)
36384 if (out_n
== 4 && in_n
== 2)
36385 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
);
36386 else if (out_n
== 8 && in_n
== 4)
36387 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
);
36388 else if (out_n
== 16 && in_n
== 8)
36389 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512
);
36393 case BUILT_IN_IFLOORF
:
36394 case BUILT_IN_LFLOORF
:
36395 case BUILT_IN_LLFLOORF
:
36396 /* The round insn does not trap on denormals. */
36397 if (flag_trapping_math
|| !TARGET_ROUND
)
36400 if (out_mode
== SImode
&& in_mode
== SFmode
)
36402 if (out_n
== 4 && in_n
== 4)
36403 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX
);
36404 else if (out_n
== 8 && in_n
== 8)
36405 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256
);
36409 case BUILT_IN_ICEIL
:
36410 case BUILT_IN_LCEIL
:
36411 case BUILT_IN_LLCEIL
:
36412 /* The round insn does not trap on denormals. */
36413 if (flag_trapping_math
|| !TARGET_ROUND
)
36416 if (out_mode
== SImode
&& in_mode
== DFmode
)
36418 if (out_n
== 4 && in_n
== 2)
36419 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
);
36420 else if (out_n
== 8 && in_n
== 4)
36421 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
);
36422 else if (out_n
== 16 && in_n
== 8)
36423 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512
);
36427 case BUILT_IN_ICEILF
:
36428 case BUILT_IN_LCEILF
:
36429 case BUILT_IN_LLCEILF
:
36430 /* The round insn does not trap on denormals. */
36431 if (flag_trapping_math
|| !TARGET_ROUND
)
36434 if (out_mode
== SImode
&& in_mode
== SFmode
)
36436 if (out_n
== 4 && in_n
== 4)
36437 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX
);
36438 else if (out_n
== 8 && in_n
== 8)
36439 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256
);
36443 case BUILT_IN_IRINT
:
36444 case BUILT_IN_LRINT
:
36445 case BUILT_IN_LLRINT
:
36446 if (out_mode
== SImode
&& in_mode
== DFmode
)
36448 if (out_n
== 4 && in_n
== 2)
36449 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX
);
36450 else if (out_n
== 8 && in_n
== 4)
36451 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256
);
36455 case BUILT_IN_IRINTF
:
36456 case BUILT_IN_LRINTF
:
36457 case BUILT_IN_LLRINTF
:
36458 if (out_mode
== SImode
&& in_mode
== SFmode
)
36460 if (out_n
== 4 && in_n
== 4)
36461 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ
);
36462 else if (out_n
== 8 && in_n
== 8)
36463 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256
);
36467 case BUILT_IN_IROUND
:
36468 case BUILT_IN_LROUND
:
36469 case BUILT_IN_LLROUND
:
36470 /* The round insn does not trap on denormals. */
36471 if (flag_trapping_math
|| !TARGET_ROUND
)
36474 if (out_mode
== SImode
&& in_mode
== DFmode
)
36476 if (out_n
== 4 && in_n
== 2)
36477 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
);
36478 else if (out_n
== 8 && in_n
== 4)
36479 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
);
36480 else if (out_n
== 16 && in_n
== 8)
36481 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512
);
36485 case BUILT_IN_IROUNDF
:
36486 case BUILT_IN_LROUNDF
:
36487 case BUILT_IN_LLROUNDF
:
36488 /* The round insn does not trap on denormals. */
36489 if (flag_trapping_math
|| !TARGET_ROUND
)
36492 if (out_mode
== SImode
&& in_mode
== SFmode
)
36494 if (out_n
== 4 && in_n
== 4)
36495 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX
);
36496 else if (out_n
== 8 && in_n
== 8)
36497 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256
);
36501 case BUILT_IN_COPYSIGN
:
36502 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36504 if (out_n
== 2 && in_n
== 2)
36505 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD
);
36506 else if (out_n
== 4 && in_n
== 4)
36507 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256
);
36508 else if (out_n
== 8 && in_n
== 8)
36509 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512
);
36513 case BUILT_IN_COPYSIGNF
:
36514 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36516 if (out_n
== 4 && in_n
== 4)
36517 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS
);
36518 else if (out_n
== 8 && in_n
== 8)
36519 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256
);
36520 else if (out_n
== 16 && in_n
== 16)
36521 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512
);
36525 case BUILT_IN_FLOOR
:
36526 /* The round insn does not trap on denormals. */
36527 if (flag_trapping_math
|| !TARGET_ROUND
)
36530 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36532 if (out_n
== 2 && in_n
== 2)
36533 return ix86_get_builtin (IX86_BUILTIN_FLOORPD
);
36534 else if (out_n
== 4 && in_n
== 4)
36535 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256
);
36539 case BUILT_IN_FLOORF
:
36540 /* The round insn does not trap on denormals. */
36541 if (flag_trapping_math
|| !TARGET_ROUND
)
36544 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36546 if (out_n
== 4 && in_n
== 4)
36547 return ix86_get_builtin (IX86_BUILTIN_FLOORPS
);
36548 else if (out_n
== 8 && in_n
== 8)
36549 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256
);
36553 case BUILT_IN_CEIL
:
36554 /* The round insn does not trap on denormals. */
36555 if (flag_trapping_math
|| !TARGET_ROUND
)
36558 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36560 if (out_n
== 2 && in_n
== 2)
36561 return ix86_get_builtin (IX86_BUILTIN_CEILPD
);
36562 else if (out_n
== 4 && in_n
== 4)
36563 return ix86_get_builtin (IX86_BUILTIN_CEILPD256
);
36567 case BUILT_IN_CEILF
:
36568 /* The round insn does not trap on denormals. */
36569 if (flag_trapping_math
|| !TARGET_ROUND
)
36572 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36574 if (out_n
== 4 && in_n
== 4)
36575 return ix86_get_builtin (IX86_BUILTIN_CEILPS
);
36576 else if (out_n
== 8 && in_n
== 8)
36577 return ix86_get_builtin (IX86_BUILTIN_CEILPS256
);
36581 case BUILT_IN_TRUNC
:
36582 /* The round insn does not trap on denormals. */
36583 if (flag_trapping_math
|| !TARGET_ROUND
)
36586 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36588 if (out_n
== 2 && in_n
== 2)
36589 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD
);
36590 else if (out_n
== 4 && in_n
== 4)
36591 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256
);
36595 case BUILT_IN_TRUNCF
:
36596 /* The round insn does not trap on denormals. */
36597 if (flag_trapping_math
|| !TARGET_ROUND
)
36600 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36602 if (out_n
== 4 && in_n
== 4)
36603 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS
);
36604 else if (out_n
== 8 && in_n
== 8)
36605 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256
);
36609 case BUILT_IN_RINT
:
36610 /* The round insn does not trap on denormals. */
36611 if (flag_trapping_math
|| !TARGET_ROUND
)
36614 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36616 if (out_n
== 2 && in_n
== 2)
36617 return ix86_get_builtin (IX86_BUILTIN_RINTPD
);
36618 else if (out_n
== 4 && in_n
== 4)
36619 return ix86_get_builtin (IX86_BUILTIN_RINTPD256
);
36623 case BUILT_IN_RINTF
:
36624 /* The round insn does not trap on denormals. */
36625 if (flag_trapping_math
|| !TARGET_ROUND
)
36628 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36630 if (out_n
== 4 && in_n
== 4)
36631 return ix86_get_builtin (IX86_BUILTIN_RINTPS
);
36632 else if (out_n
== 8 && in_n
== 8)
36633 return ix86_get_builtin (IX86_BUILTIN_RINTPS256
);
36637 case BUILT_IN_ROUND
:
36638 /* The round insn does not trap on denormals. */
36639 if (flag_trapping_math
|| !TARGET_ROUND
)
36642 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36644 if (out_n
== 2 && in_n
== 2)
36645 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ
);
36646 else if (out_n
== 4 && in_n
== 4)
36647 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256
);
36651 case BUILT_IN_ROUNDF
:
36652 /* The round insn does not trap on denormals. */
36653 if (flag_trapping_math
|| !TARGET_ROUND
)
36656 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36658 if (out_n
== 4 && in_n
== 4)
36659 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ
);
36660 else if (out_n
== 8 && in_n
== 8)
36661 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256
);
36666 if (out_mode
== DFmode
&& in_mode
== DFmode
)
36668 if (out_n
== 2 && in_n
== 2)
36669 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD
);
36670 if (out_n
== 4 && in_n
== 4)
36671 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256
);
36675 case BUILT_IN_FMAF
:
36676 if (out_mode
== SFmode
&& in_mode
== SFmode
)
36678 if (out_n
== 4 && in_n
== 4)
36679 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS
);
36680 if (out_n
== 8 && in_n
== 8)
36681 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256
);
36689 /* Dispatch to a handler for a vectorization library. */
36690 if (ix86_veclib_handler
)
36691 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
36697 /* Handler for an SVML-style interface to
36698 a library with vectorized intrinsics. */
36701 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
36704 tree fntype
, new_fndecl
, args
;
36707 enum machine_mode el_mode
, in_mode
;
36710 /* The SVML is suitable for unsafe math only. */
36711 if (!flag_unsafe_math_optimizations
)
36714 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
36715 n
= TYPE_VECTOR_SUBPARTS (type_out
);
36716 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
36717 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
36718 if (el_mode
!= in_mode
36726 case BUILT_IN_LOG10
:
36728 case BUILT_IN_TANH
:
36730 case BUILT_IN_ATAN
:
36731 case BUILT_IN_ATAN2
:
36732 case BUILT_IN_ATANH
:
36733 case BUILT_IN_CBRT
:
36734 case BUILT_IN_SINH
:
36736 case BUILT_IN_ASINH
:
36737 case BUILT_IN_ASIN
:
36738 case BUILT_IN_COSH
:
36740 case BUILT_IN_ACOSH
:
36741 case BUILT_IN_ACOS
:
36742 if (el_mode
!= DFmode
|| n
!= 2)
36746 case BUILT_IN_EXPF
:
36747 case BUILT_IN_LOGF
:
36748 case BUILT_IN_LOG10F
:
36749 case BUILT_IN_POWF
:
36750 case BUILT_IN_TANHF
:
36751 case BUILT_IN_TANF
:
36752 case BUILT_IN_ATANF
:
36753 case BUILT_IN_ATAN2F
:
36754 case BUILT_IN_ATANHF
:
36755 case BUILT_IN_CBRTF
:
36756 case BUILT_IN_SINHF
:
36757 case BUILT_IN_SINF
:
36758 case BUILT_IN_ASINHF
:
36759 case BUILT_IN_ASINF
:
36760 case BUILT_IN_COSHF
:
36761 case BUILT_IN_COSF
:
36762 case BUILT_IN_ACOSHF
:
36763 case BUILT_IN_ACOSF
:
36764 if (el_mode
!= SFmode
|| n
!= 4)
36772 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
36774 if (fn
== BUILT_IN_LOGF
)
36775 strcpy (name
, "vmlsLn4");
36776 else if (fn
== BUILT_IN_LOG
)
36777 strcpy (name
, "vmldLn2");
36780 sprintf (name
, "vmls%s", bname
+10);
36781 name
[strlen (name
)-1] = '4';
36784 sprintf (name
, "vmld%s2", bname
+10);
36786 /* Convert to uppercase. */
36790 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
36792 args
= TREE_CHAIN (args
))
36796 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
36798 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
36800 /* Build a function declaration for the vectorized function. */
36801 new_fndecl
= build_decl (BUILTINS_LOCATION
,
36802 FUNCTION_DECL
, get_identifier (name
), fntype
);
36803 TREE_PUBLIC (new_fndecl
) = 1;
36804 DECL_EXTERNAL (new_fndecl
) = 1;
36805 DECL_IS_NOVOPS (new_fndecl
) = 1;
36806 TREE_READONLY (new_fndecl
) = 1;
36811 /* Handler for an ACML-style interface to
36812 a library with vectorized intrinsics. */
36815 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
36817 char name
[20] = "__vr.._";
36818 tree fntype
, new_fndecl
, args
;
36821 enum machine_mode el_mode
, in_mode
;
36824 /* The ACML is 64bits only and suitable for unsafe math only as
36825 it does not correctly support parts of IEEE with the required
36826 precision such as denormals. */
36828 || !flag_unsafe_math_optimizations
)
36831 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
36832 n
= TYPE_VECTOR_SUBPARTS (type_out
);
36833 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
36834 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
36835 if (el_mode
!= in_mode
36845 case BUILT_IN_LOG2
:
36846 case BUILT_IN_LOG10
:
36849 if (el_mode
!= DFmode
36854 case BUILT_IN_SINF
:
36855 case BUILT_IN_COSF
:
36856 case BUILT_IN_EXPF
:
36857 case BUILT_IN_POWF
:
36858 case BUILT_IN_LOGF
:
36859 case BUILT_IN_LOG2F
:
36860 case BUILT_IN_LOG10F
:
36863 if (el_mode
!= SFmode
36872 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
36873 sprintf (name
+ 7, "%s", bname
+10);
36876 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
36878 args
= TREE_CHAIN (args
))
36882 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
36884 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
36886 /* Build a function declaration for the vectorized function. */
36887 new_fndecl
= build_decl (BUILTINS_LOCATION
,
36888 FUNCTION_DECL
, get_identifier (name
), fntype
);
36889 TREE_PUBLIC (new_fndecl
) = 1;
36890 DECL_EXTERNAL (new_fndecl
) = 1;
36891 DECL_IS_NOVOPS (new_fndecl
) = 1;
36892 TREE_READONLY (new_fndecl
) = 1;
36897 /* Returns a decl of a function that implements gather load with
36898 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36899 Return NULL_TREE if it is not available. */
36902 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
36903 const_tree index_type
, int scale
)
36906 enum ix86_builtins code
;
36911 if ((TREE_CODE (index_type
) != INTEGER_TYPE
36912 && !POINTER_TYPE_P (index_type
))
36913 || (TYPE_MODE (index_type
) != SImode
36914 && TYPE_MODE (index_type
) != DImode
))
36917 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
36920 /* v*gather* insn sign extends index to pointer mode. */
36921 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
36922 && TYPE_UNSIGNED (index_type
))
36927 || (scale
& (scale
- 1)) != 0)
36930 si
= TYPE_MODE (index_type
) == SImode
;
36931 switch (TYPE_MODE (mem_vectype
))
36934 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
36937 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
36940 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
36943 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
36946 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
36949 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
36952 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
36955 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
36958 if (TARGET_AVX512F
)
36959 code
= si
? IX86_BUILTIN_GATHER3ALTSIV8DF
: IX86_BUILTIN_GATHER3DIV8DF
;
36964 if (TARGET_AVX512F
)
36965 code
= si
? IX86_BUILTIN_GATHER3ALTSIV8DI
: IX86_BUILTIN_GATHER3DIV8DI
;
36970 if (TARGET_AVX512F
)
36971 code
= si
? IX86_BUILTIN_GATHER3SIV16SF
: IX86_BUILTIN_GATHER3ALTDIV16SF
;
36976 if (TARGET_AVX512F
)
36977 code
= si
? IX86_BUILTIN_GATHER3SIV16SI
: IX86_BUILTIN_GATHER3ALTDIV16SI
;
36985 return ix86_get_builtin (code
);
36988 /* Returns a code for a target-specific builtin that implements
36989 reciprocal of the function, or NULL_TREE if not available. */
36992 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
, bool)
36994 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
36995 && flag_finite_math_only
&& !flag_trapping_math
36996 && flag_unsafe_math_optimizations
))
37000 /* Machine dependent builtins. */
37003 /* Vectorized version of sqrt to rsqrt conversion. */
37004 case IX86_BUILTIN_SQRTPS_NR
:
37005 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR
);
37007 case IX86_BUILTIN_SQRTPS_NR256
:
37008 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256
);
37014 /* Normal builtins. */
37017 /* Sqrt to rsqrt conversion. */
37018 case BUILT_IN_SQRTF
:
37019 return ix86_get_builtin (IX86_BUILTIN_RSQRTF
);
37026 /* Helper for avx_vpermilps256_operand et al. This is also used by
37027 the expansion functions to turn the parallel back into a mask.
37028 The return value is 0 for no match and the imm8+1 for a match. */
37031 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
37033 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
37035 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
37037 if (XVECLEN (par
, 0) != (int) nelt
)
37040 /* Validate that all of the elements are constants, and not totally
37041 out of range. Copy the data into an integral array to make the
37042 subsequent checks easier. */
37043 for (i
= 0; i
< nelt
; ++i
)
37045 rtx er
= XVECEXP (par
, 0, i
);
37046 unsigned HOST_WIDE_INT ei
;
37048 if (!CONST_INT_P (er
))
37059 /* In the 512-bit DFmode case, we can only move elements within
37060 a 128-bit lane. First fill the second part of the mask,
37062 for (i
= 4; i
< 6; ++i
)
37064 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
37066 mask
|= (ipar
[i
] - 4) << i
;
37068 for (i
= 6; i
< 8; ++i
)
37072 mask
|= (ipar
[i
] - 6) << i
;
37077 /* In the 256-bit DFmode case, we can only move elements within
37079 for (i
= 0; i
< 2; ++i
)
37083 mask
|= ipar
[i
] << i
;
37085 for (i
= 2; i
< 4; ++i
)
37089 mask
|= (ipar
[i
] - 2) << i
;
37094 /* In 512 bit SFmode case, permutation in the upper 256 bits
37095 must mirror the permutation in the lower 256-bits. */
37096 for (i
= 0; i
< 8; ++i
)
37097 if (ipar
[i
] + 8 != ipar
[i
+ 8])
37102 /* In 256 bit SFmode case, we have full freedom of
37103 movement within the low 128-bit lane, but the high 128-bit
37104 lane must mirror the exact same pattern. */
37105 for (i
= 0; i
< 4; ++i
)
37106 if (ipar
[i
] + 4 != ipar
[i
+ 4])
37113 /* In the 128-bit case, we've full freedom in the placement of
37114 the elements from the source operand. */
37115 for (i
= 0; i
< nelt
; ++i
)
37116 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
37120 gcc_unreachable ();
37123 /* Make sure success has a non-zero value by adding one. */
37127 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37128 the expansion functions to turn the parallel back into a mask.
37129 The return value is 0 for no match and the imm8+1 for a match. */
37132 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
37134 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
37136 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
37138 if (XVECLEN (par
, 0) != (int) nelt
)
37141 /* Validate that all of the elements are constants, and not totally
37142 out of range. Copy the data into an integral array to make the
37143 subsequent checks easier. */
37144 for (i
= 0; i
< nelt
; ++i
)
37146 rtx er
= XVECEXP (par
, 0, i
);
37147 unsigned HOST_WIDE_INT ei
;
37149 if (!CONST_INT_P (er
))
37152 if (ei
>= 2 * nelt
)
37157 /* Validate that the halves of the permute are halves. */
37158 for (i
= 0; i
< nelt2
- 1; ++i
)
37159 if (ipar
[i
] + 1 != ipar
[i
+ 1])
37161 for (i
= nelt2
; i
< nelt
- 1; ++i
)
37162 if (ipar
[i
] + 1 != ipar
[i
+ 1])
37165 /* Reconstruct the mask. */
37166 for (i
= 0; i
< 2; ++i
)
37168 unsigned e
= ipar
[i
* nelt2
];
37172 mask
|= e
<< (i
* 4);
37175 /* Make sure success has a non-zero value by adding one. */
37179 /* Return a register priority for hard reg REGNO. */
37181 ix86_register_priority (int hard_regno
)
37183 /* ebp and r13 as the base always wants a displacement, r12 as the
37184 base always wants an index. So discourage their usage in an
37186 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
37188 if (hard_regno
== BP_REG
)
37190 /* New x86-64 int registers result in bigger code size. Discourage
37192 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
37194 /* New x86-64 SSE registers result in bigger code size. Discourage
37196 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
37198 /* Usage of AX register results in smaller code. Prefer it. */
37199 if (hard_regno
== 0)
37204 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37206 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37207 QImode must go into class Q_REGS.
37208 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37209 movdf to do mem-to-mem moves through integer regs. */
37212 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
37214 enum machine_mode mode
= GET_MODE (x
);
37216 /* We're only allowed to return a subclass of CLASS. Many of the
37217 following checks fail for NO_REGS, so eliminate that early. */
37218 if (regclass
== NO_REGS
)
37221 /* All classes can load zeros. */
37222 if (x
== CONST0_RTX (mode
))
37225 /* Force constants into memory if we are loading a (nonzero) constant into
37226 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37227 instructions to load from a constant. */
37229 && (MAYBE_MMX_CLASS_P (regclass
)
37230 || MAYBE_SSE_CLASS_P (regclass
)
37231 || MAYBE_MASK_CLASS_P (regclass
)))
37234 /* Prefer SSE regs only, if we can use them for math. */
37235 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
37236 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
37238 /* Floating-point constants need more complex checks. */
37239 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
37241 /* General regs can load everything. */
37242 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
37245 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37246 zero above. We only want to wind up preferring 80387 registers if
37247 we plan on doing computation with them. */
37249 && standard_80387_constant_p (x
) > 0)
37251 /* Limit class to non-sse. */
37252 if (regclass
== FLOAT_SSE_REGS
)
37254 if (regclass
== FP_TOP_SSE_REGS
)
37256 if (regclass
== FP_SECOND_SSE_REGS
)
37257 return FP_SECOND_REG
;
37258 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
37265 /* Generally when we see PLUS here, it's the function invariant
37266 (plus soft-fp const_int). Which can only be computed into general
37268 if (GET_CODE (x
) == PLUS
)
37269 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
37271 /* QImode constants are easy to load, but non-constant QImode data
37272 must go into Q_REGS. */
37273 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
37275 if (reg_class_subset_p (regclass
, Q_REGS
))
37277 if (reg_class_subset_p (Q_REGS
, regclass
))
37285 /* Discourage putting floating-point values in SSE registers unless
37286 SSE math is being used, and likewise for the 387 registers. */
37288 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
37290 enum machine_mode mode
= GET_MODE (x
);
37292 /* Restrict the output reload class to the register bank that we are doing
37293 math on. If we would like not to return a subset of CLASS, reject this
37294 alternative: if reload cannot do this, it will still use its choice. */
37295 mode
= GET_MODE (x
);
37296 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
37297 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
37299 if (X87_FLOAT_MODE_P (mode
))
37301 if (regclass
== FP_TOP_SSE_REGS
)
37303 else if (regclass
== FP_SECOND_SSE_REGS
)
37304 return FP_SECOND_REG
;
37306 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
37313 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
37314 enum machine_mode mode
, secondary_reload_info
*sri
)
37316 /* Double-word spills from general registers to non-offsettable memory
37317 references (zero-extended addresses) require special handling. */
37320 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
37321 && INTEGER_CLASS_P (rclass
)
37322 && !offsettable_memref_p (x
))
37325 ? CODE_FOR_reload_noff_load
37326 : CODE_FOR_reload_noff_store
);
37327 /* Add the cost of moving address to a temporary. */
37328 sri
->extra_cost
= 1;
37333 /* QImode spills from non-QI registers require
37334 intermediate register on 32bit targets. */
37336 && (MAYBE_MASK_CLASS_P (rclass
)
37337 || (!TARGET_64BIT
&& !in_p
37338 && INTEGER_CLASS_P (rclass
)
37339 && MAYBE_NON_Q_CLASS_P (rclass
))))
37348 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
37349 regno
= true_regnum (x
);
37351 /* Return Q_REGS if the operand is in memory. */
37356 /* This condition handles corner case where an expression involving
37357 pointers gets vectorized. We're trying to use the address of a
37358 stack slot as a vector initializer.
37360 (set (reg:V2DI 74 [ vect_cst_.2 ])
37361 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37363 Eventually frame gets turned into sp+offset like this:
37365 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37366 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37367 (const_int 392 [0x188]))))
37369 That later gets turned into:
37371 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37372 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37373 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37375 We'll have the following reload recorded:
37377 Reload 0: reload_in (DI) =
37378 (plus:DI (reg/f:DI 7 sp)
37379 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37380 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37381 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37382 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37383 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37384 reload_reg_rtx: (reg:V2DI 22 xmm1)
37386 Which isn't going to work since SSE instructions can't handle scalar
37387 additions. Returning GENERAL_REGS forces the addition into integer
37388 register and reload can handle subsequent reloads without problems. */
37390 if (in_p
&& GET_CODE (x
) == PLUS
37391 && SSE_CLASS_P (rclass
)
37392 && SCALAR_INT_MODE_P (mode
))
37393 return GENERAL_REGS
;
37398 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37401 ix86_class_likely_spilled_p (reg_class_t rclass
)
37412 case SSE_FIRST_REG
:
37414 case FP_SECOND_REG
:
37424 /* If we are copying between general and FP registers, we need a memory
37425 location. The same is true for SSE and MMX registers.
37427 To optimize register_move_cost performance, allow inline variant.
37429 The macro can't work reliably when one of the CLASSES is class containing
37430 registers from multiple units (SSE, MMX, integer). We avoid this by never
37431 combining those units in single alternative in the machine description.
37432 Ensure that this constraint holds to avoid unexpected surprises.
37434 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37435 enforce these sanity checks. */
37438 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
37439 enum machine_mode mode
, int strict
)
37441 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
37443 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
37444 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
37445 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
37446 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
37447 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
37448 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
37450 gcc_assert (!strict
|| lra_in_progress
);
37454 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
37457 /* Between mask and general, we have moves no larger than word size. */
37458 if ((MAYBE_MASK_CLASS_P (class1
) != MAYBE_MASK_CLASS_P (class2
))
37459 && (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
37462 /* ??? This is a lie. We do have moves between mmx/general, and for
37463 mmx/sse2. But by saying we need secondary memory we discourage the
37464 register allocator from using the mmx registers unless needed. */
37465 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
37468 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
37470 /* SSE1 doesn't have any direct moves from other classes. */
37474 /* If the target says that inter-unit moves are more expensive
37475 than moving through memory, then don't generate them. */
37476 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
37477 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
37480 /* Between SSE and general, we have moves no larger than word size. */
37481 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
37489 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
37490 enum machine_mode mode
, int strict
)
37492 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
37495 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37497 On the 80386, this is the size of MODE in words,
37498 except in the FP regs, where a single reg is always enough. */
37500 static unsigned char
37501 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
37503 if (MAYBE_INTEGER_CLASS_P (rclass
))
37505 if (mode
== XFmode
)
37506 return (TARGET_64BIT
? 2 : 3);
37507 else if (mode
== XCmode
)
37508 return (TARGET_64BIT
? 4 : 6);
37510 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
37514 if (COMPLEX_MODE_P (mode
))
37521 /* Return true if the registers in CLASS cannot represent the change from
37522 modes FROM to TO. */
37525 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
37526 enum reg_class regclass
)
37531 /* x87 registers can't do subreg at all, as all values are reformatted
37532 to extended precision. */
37533 if (MAYBE_FLOAT_CLASS_P (regclass
))
37536 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
37538 /* Vector registers do not support QI or HImode loads. If we don't
37539 disallow a change to these modes, reload will assume it's ok to
37540 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37541 the vec_dupv4hi pattern. */
37542 if (GET_MODE_SIZE (from
) < 4)
37549 /* Return the cost of moving data of mode M between a
37550 register and memory. A value of 2 is the default; this cost is
37551 relative to those in `REGISTER_MOVE_COST'.
37553 This function is used extensively by register_move_cost that is used to
37554 build tables at startup. Make it inline in this case.
37555 When IN is 2, return maximum of in and out move cost.
37557 If moving between registers and memory is more expensive than
37558 between two registers, you should define this macro to express the
37561 Model also increased moving costs of QImode registers in non
37565 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
37569 if (FLOAT_CLASS_P (regclass
))
37587 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
37588 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
37590 if (SSE_CLASS_P (regclass
))
37593 switch (GET_MODE_SIZE (mode
))
37608 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
37609 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
37611 if (MMX_CLASS_P (regclass
))
37614 switch (GET_MODE_SIZE (mode
))
37626 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
37627 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
37629 switch (GET_MODE_SIZE (mode
))
37632 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
37635 return ix86_cost
->int_store
[0];
37636 if (TARGET_PARTIAL_REG_DEPENDENCY
37637 && optimize_function_for_speed_p (cfun
))
37638 cost
= ix86_cost
->movzbl_load
;
37640 cost
= ix86_cost
->int_load
[0];
37642 return MAX (cost
, ix86_cost
->int_store
[0]);
37648 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
37650 return ix86_cost
->movzbl_load
;
37652 return ix86_cost
->int_store
[0] + 4;
37657 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
37658 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
37660 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37661 if (mode
== TFmode
)
37664 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
37666 cost
= ix86_cost
->int_load
[2];
37668 cost
= ix86_cost
->int_store
[2];
37669 return (cost
* (((int) GET_MODE_SIZE (mode
)
37670 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
37675 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
37678 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
37682 /* Return the cost of moving data from a register in class CLASS1 to
37683 one in class CLASS2.
37685 It is not required that the cost always equal 2 when FROM is the same as TO;
37686 on some machines it is expensive to move between registers if they are not
37687 general registers. */
37690 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
37691 reg_class_t class2_i
)
37693 enum reg_class class1
= (enum reg_class
) class1_i
;
37694 enum reg_class class2
= (enum reg_class
) class2_i
;
37696 /* In case we require secondary memory, compute cost of the store followed
37697 by load. In order to avoid bad register allocation choices, we need
37698 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37700 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
37704 cost
+= inline_memory_move_cost (mode
, class1
, 2);
37705 cost
+= inline_memory_move_cost (mode
, class2
, 2);
37707 /* In case of copying from general_purpose_register we may emit multiple
37708 stores followed by single load causing memory size mismatch stall.
37709 Count this as arbitrarily high cost of 20. */
37710 if (targetm
.class_max_nregs (class1
, mode
)
37711 > targetm
.class_max_nregs (class2
, mode
))
37714 /* In the case of FP/MMX moves, the registers actually overlap, and we
37715 have to switch modes in order to treat them differently. */
37716 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
37717 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
37723 /* Moves between SSE/MMX and integer unit are expensive. */
37724 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
37725 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
37727 /* ??? By keeping returned value relatively high, we limit the number
37728 of moves between integer and MMX/SSE registers for all targets.
37729 Additionally, high value prevents problem with x86_modes_tieable_p(),
37730 where integer modes in MMX/SSE registers are not tieable
37731 because of missing QImode and HImode moves to, from or between
37732 MMX/SSE registers. */
37733 return MAX (8, ix86_cost
->mmxsse_to_integer
);
37735 if (MAYBE_FLOAT_CLASS_P (class1
))
37736 return ix86_cost
->fp_move
;
37737 if (MAYBE_SSE_CLASS_P (class1
))
37738 return ix86_cost
->sse_move
;
37739 if (MAYBE_MMX_CLASS_P (class1
))
37740 return ix86_cost
->mmx_move
;
37744 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37748 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
37750 /* Flags and only flags can only hold CCmode values. */
37751 if (CC_REGNO_P (regno
))
37752 return GET_MODE_CLASS (mode
) == MODE_CC
;
37753 if (GET_MODE_CLASS (mode
) == MODE_CC
37754 || GET_MODE_CLASS (mode
) == MODE_RANDOM
37755 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
37757 if (STACK_REGNO_P (regno
))
37758 return VALID_FP_MODE_P (mode
);
37759 if (MASK_REGNO_P (regno
))
37760 return (VALID_MASK_REG_MODE (mode
)
37761 || (TARGET_AVX512BW
&& VALID_MASK_AVX512BW_MODE (mode
)));
37762 if (SSE_REGNO_P (regno
))
37764 /* We implement the move patterns for all vector modes into and
37765 out of SSE registers, even when no operation instructions
37768 /* For AVX-512 we allow, regardless of regno:
37770 - any of 512-bit wide vector mode
37771 - any scalar mode. */
37774 || VALID_AVX512F_REG_MODE (mode
)
37775 || VALID_AVX512F_SCALAR_MODE (mode
)))
37778 /* TODO check for QI/HI scalars. */
37779 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37780 if (TARGET_AVX512VL
37783 || VALID_AVX256_REG_MODE (mode
)
37784 || VALID_AVX512VL_128_REG_MODE (mode
)))
37787 /* xmm16-xmm31 are only available for AVX-512. */
37788 if (EXT_REX_SSE_REGNO_P (regno
))
37791 /* OImode and AVX modes are available only when AVX is enabled. */
37792 return ((TARGET_AVX
37793 && VALID_AVX256_REG_OR_OI_MODE (mode
))
37794 || VALID_SSE_REG_MODE (mode
)
37795 || VALID_SSE2_REG_MODE (mode
)
37796 || VALID_MMX_REG_MODE (mode
)
37797 || VALID_MMX_REG_MODE_3DNOW (mode
));
37799 if (MMX_REGNO_P (regno
))
37801 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37802 so if the register is available at all, then we can move data of
37803 the given mode into or out of it. */
37804 return (VALID_MMX_REG_MODE (mode
)
37805 || VALID_MMX_REG_MODE_3DNOW (mode
));
37808 if (mode
== QImode
)
37810 /* Take care for QImode values - they can be in non-QI regs,
37811 but then they do cause partial register stalls. */
37812 if (ANY_QI_REGNO_P (regno
))
37814 if (!TARGET_PARTIAL_REG_STALL
)
37816 /* LRA checks if the hard register is OK for the given mode.
37817 QImode values can live in non-QI regs, so we allow all
37819 if (lra_in_progress
)
37821 return !can_create_pseudo_p ();
37823 /* We handle both integer and floats in the general purpose registers. */
37824 else if (VALID_INT_MODE_P (mode
))
37826 else if (VALID_FP_MODE_P (mode
))
37828 else if (VALID_DFP_MODE_P (mode
))
37830 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37831 on to use that value in smaller contexts, this can easily force a
37832 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37833 supporting DImode, allow it. */
37834 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
37840 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37841 tieable integer mode. */
37844 ix86_tieable_integer_mode_p (enum machine_mode mode
)
37853 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
37856 return TARGET_64BIT
;
37863 /* Return true if MODE1 is accessible in a register that can hold MODE2
37864 without copying. That is, all register classes that can hold MODE2
37865 can also hold MODE1. */
37868 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
37870 if (mode1
== mode2
)
37873 if (ix86_tieable_integer_mode_p (mode1
)
37874 && ix86_tieable_integer_mode_p (mode2
))
37877 /* MODE2 being XFmode implies fp stack or general regs, which means we
37878 can tie any smaller floating point modes to it. Note that we do not
37879 tie this with TFmode. */
37880 if (mode2
== XFmode
)
37881 return mode1
== SFmode
|| mode1
== DFmode
;
37883 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37884 that we can tie it with SFmode. */
37885 if (mode2
== DFmode
)
37886 return mode1
== SFmode
;
37888 /* If MODE2 is only appropriate for an SSE register, then tie with
37889 any other mode acceptable to SSE registers. */
37890 if (GET_MODE_SIZE (mode2
) == 32
37891 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
37892 return (GET_MODE_SIZE (mode1
) == 32
37893 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
37894 if (GET_MODE_SIZE (mode2
) == 16
37895 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
37896 return (GET_MODE_SIZE (mode1
) == 16
37897 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
37899 /* If MODE2 is appropriate for an MMX register, then tie
37900 with any other mode acceptable to MMX registers. */
37901 if (GET_MODE_SIZE (mode2
) == 8
37902 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
37903 return (GET_MODE_SIZE (mode1
) == 8
37904 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
37909 /* Return the cost of moving between two registers of mode MODE. */
37912 ix86_set_reg_reg_cost (enum machine_mode mode
)
37914 unsigned int units
= UNITS_PER_WORD
;
37916 switch (GET_MODE_CLASS (mode
))
37922 units
= GET_MODE_SIZE (CCmode
);
37926 if ((TARGET_SSE
&& mode
== TFmode
)
37927 || (TARGET_80387
&& mode
== XFmode
)
37928 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
37929 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
37930 units
= GET_MODE_SIZE (mode
);
37933 case MODE_COMPLEX_FLOAT
:
37934 if ((TARGET_SSE
&& mode
== TCmode
)
37935 || (TARGET_80387
&& mode
== XCmode
)
37936 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
37937 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
37938 units
= GET_MODE_SIZE (mode
);
37941 case MODE_VECTOR_INT
:
37942 case MODE_VECTOR_FLOAT
:
37943 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
37944 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37945 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37946 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37947 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
37948 units
= GET_MODE_SIZE (mode
);
37951 /* Return the cost of moving between two registers of mode MODE,
37952 assuming that the move will be in pieces of at most UNITS bytes. */
37953 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
37956 /* Compute a (partial) cost for rtx X. Return true if the complete
37957 cost has been computed, and false if subexpressions should be
37958 scanned. In either case, *TOTAL contains the cost result. */
37961 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
37965 enum rtx_code code
= (enum rtx_code
) code_i
;
37966 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
37967 enum machine_mode mode
= GET_MODE (x
);
37968 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
37973 if (register_operand (SET_DEST (x
), VOIDmode
)
37974 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
37976 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
37985 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
37987 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
37989 else if (flag_pic
&& SYMBOLIC_CONST (x
)
37991 && (GET_CODE (x
) == LABEL_REF
37992 || (GET_CODE (x
) == SYMBOL_REF
37993 && SYMBOL_REF_LOCAL_P (x
)))))
38000 if (mode
== VOIDmode
)
38005 switch (standard_80387_constant_p (x
))
38010 default: /* Other constants */
38017 if (SSE_FLOAT_MODE_P (mode
))
38020 switch (standard_sse_constant_p (x
))
38024 case 1: /* 0: xor eliminates false dependency */
38027 default: /* -1: cmp contains false dependency */
38032 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38033 it'll probably end up. Add a penalty for size. */
38034 *total
= (COSTS_N_INSNS (1)
38035 + (flag_pic
!= 0 && !TARGET_64BIT
)
38036 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
38040 /* The zero extensions is often completely free on x86_64, so make
38041 it as cheap as possible. */
38042 if (TARGET_64BIT
&& mode
== DImode
38043 && GET_MODE (XEXP (x
, 0)) == SImode
)
38045 else if (TARGET_ZERO_EXTEND_WITH_AND
)
38046 *total
= cost
->add
;
38048 *total
= cost
->movzx
;
38052 *total
= cost
->movsx
;
38056 if (SCALAR_INT_MODE_P (mode
)
38057 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
38058 && CONST_INT_P (XEXP (x
, 1)))
38060 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
38063 *total
= cost
->add
;
38066 if ((value
== 2 || value
== 3)
38067 && cost
->lea
<= cost
->shift_const
)
38069 *total
= cost
->lea
;
38079 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
38081 /* ??? Should be SSE vector operation cost. */
38082 /* At least for published AMD latencies, this really is the same
38083 as the latency for a simple fpu operation like fabs. */
38084 /* V*QImode is emulated with 1-11 insns. */
38085 if (mode
== V16QImode
|| mode
== V32QImode
)
38088 if (TARGET_XOP
&& mode
== V16QImode
)
38090 /* For XOP we use vpshab, which requires a broadcast of the
38091 value to the variable shift insn. For constants this
38092 means a V16Q const in mem; even when we can perform the
38093 shift with one insn set the cost to prefer paddb. */
38094 if (CONSTANT_P (XEXP (x
, 1)))
38096 *total
= (cost
->fabs
38097 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
38098 + (speed
? 2 : COSTS_N_BYTES (16)));
38103 else if (TARGET_SSSE3
)
38105 *total
= cost
->fabs
* count
;
38108 *total
= cost
->fabs
;
38110 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
38112 if (CONST_INT_P (XEXP (x
, 1)))
38114 if (INTVAL (XEXP (x
, 1)) > 32)
38115 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
38117 *total
= cost
->shift_const
* 2;
38121 if (GET_CODE (XEXP (x
, 1)) == AND
)
38122 *total
= cost
->shift_var
* 2;
38124 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
38129 if (CONST_INT_P (XEXP (x
, 1)))
38130 *total
= cost
->shift_const
;
38131 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
38132 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
38134 /* Return the cost after shift-and truncation. */
38135 *total
= cost
->shift_var
;
38139 *total
= cost
->shift_var
;
38147 gcc_assert (FLOAT_MODE_P (mode
));
38148 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
38150 /* ??? SSE scalar/vector cost should be used here. */
38151 /* ??? Bald assumption that fma has the same cost as fmul. */
38152 *total
= cost
->fmul
;
38153 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
38155 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38157 if (GET_CODE (sub
) == NEG
)
38158 sub
= XEXP (sub
, 0);
38159 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
38162 if (GET_CODE (sub
) == NEG
)
38163 sub
= XEXP (sub
, 0);
38164 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
38169 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38171 /* ??? SSE scalar cost should be used here. */
38172 *total
= cost
->fmul
;
38175 else if (X87_FLOAT_MODE_P (mode
))
38177 *total
= cost
->fmul
;
38180 else if (FLOAT_MODE_P (mode
))
38182 /* ??? SSE vector cost should be used here. */
38183 *total
= cost
->fmul
;
38186 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
38188 /* V*QImode is emulated with 7-13 insns. */
38189 if (mode
== V16QImode
|| mode
== V32QImode
)
38192 if (TARGET_XOP
&& mode
== V16QImode
)
38194 else if (TARGET_SSSE3
)
38196 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
38198 /* V*DImode is emulated with 5-8 insns. */
38199 else if (mode
== V2DImode
|| mode
== V4DImode
)
38201 if (TARGET_XOP
&& mode
== V2DImode
)
38202 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
38204 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
38206 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38207 insns, including two PMULUDQ. */
38208 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
38209 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
38211 *total
= cost
->fmul
;
38216 rtx op0
= XEXP (x
, 0);
38217 rtx op1
= XEXP (x
, 1);
38219 if (CONST_INT_P (XEXP (x
, 1)))
38221 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
38222 for (nbits
= 0; value
!= 0; value
&= value
- 1)
38226 /* This is arbitrary. */
38229 /* Compute costs correctly for widening multiplication. */
38230 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
38231 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
38232 == GET_MODE_SIZE (mode
))
38234 int is_mulwiden
= 0;
38235 enum machine_mode inner_mode
= GET_MODE (op0
);
38237 if (GET_CODE (op0
) == GET_CODE (op1
))
38238 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
38239 else if (CONST_INT_P (op1
))
38241 if (GET_CODE (op0
) == SIGN_EXTEND
)
38242 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
38245 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
38249 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
38252 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
38253 + nbits
* cost
->mult_bit
38254 + rtx_cost (op0
, outer_code
, opno
, speed
)
38255 + rtx_cost (op1
, outer_code
, opno
, speed
));
38264 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38265 /* ??? SSE cost should be used here. */
38266 *total
= cost
->fdiv
;
38267 else if (X87_FLOAT_MODE_P (mode
))
38268 *total
= cost
->fdiv
;
38269 else if (FLOAT_MODE_P (mode
))
38270 /* ??? SSE vector cost should be used here. */
38271 *total
= cost
->fdiv
;
38273 *total
= cost
->divide
[MODE_INDEX (mode
)];
38277 if (GET_MODE_CLASS (mode
) == MODE_INT
38278 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
38280 if (GET_CODE (XEXP (x
, 0)) == PLUS
38281 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
38282 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
38283 && CONSTANT_P (XEXP (x
, 1)))
38285 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
38286 if (val
== 2 || val
== 4 || val
== 8)
38288 *total
= cost
->lea
;
38289 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
38290 outer_code
, opno
, speed
);
38291 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
38292 outer_code
, opno
, speed
);
38293 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
38297 else if (GET_CODE (XEXP (x
, 0)) == MULT
38298 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
38300 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
38301 if (val
== 2 || val
== 4 || val
== 8)
38303 *total
= cost
->lea
;
38304 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
38305 outer_code
, opno
, speed
);
38306 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
38310 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
38312 *total
= cost
->lea
;
38313 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
38314 outer_code
, opno
, speed
);
38315 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
38316 outer_code
, opno
, speed
);
38317 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
38324 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38326 /* ??? SSE cost should be used here. */
38327 *total
= cost
->fadd
;
38330 else if (X87_FLOAT_MODE_P (mode
))
38332 *total
= cost
->fadd
;
38335 else if (FLOAT_MODE_P (mode
))
38337 /* ??? SSE vector cost should be used here. */
38338 *total
= cost
->fadd
;
38346 if (GET_MODE_CLASS (mode
) == MODE_INT
38347 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
38349 *total
= (cost
->add
* 2
38350 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
38351 << (GET_MODE (XEXP (x
, 0)) != DImode
))
38352 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
38353 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
38359 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38361 /* ??? SSE cost should be used here. */
38362 *total
= cost
->fchs
;
38365 else if (X87_FLOAT_MODE_P (mode
))
38367 *total
= cost
->fchs
;
38370 else if (FLOAT_MODE_P (mode
))
38372 /* ??? SSE vector cost should be used here. */
38373 *total
= cost
->fchs
;
38379 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
38381 /* ??? Should be SSE vector operation cost. */
38382 /* At least for published AMD latencies, this really is the same
38383 as the latency for a simple fpu operation like fabs. */
38384 *total
= cost
->fabs
;
38386 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
38387 *total
= cost
->add
* 2;
38389 *total
= cost
->add
;
38393 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
38394 && XEXP (XEXP (x
, 0), 1) == const1_rtx
38395 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
38396 && XEXP (x
, 1) == const0_rtx
)
38398 /* This kind of construct is implemented using test[bwl].
38399 Treat it as if we had an AND. */
38400 *total
= (cost
->add
38401 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
38402 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
38408 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
38413 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38414 /* ??? SSE cost should be used here. */
38415 *total
= cost
->fabs
;
38416 else if (X87_FLOAT_MODE_P (mode
))
38417 *total
= cost
->fabs
;
38418 else if (FLOAT_MODE_P (mode
))
38419 /* ??? SSE vector cost should be used here. */
38420 *total
= cost
->fabs
;
38424 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38425 /* ??? SSE cost should be used here. */
38426 *total
= cost
->fsqrt
;
38427 else if (X87_FLOAT_MODE_P (mode
))
38428 *total
= cost
->fsqrt
;
38429 else if (FLOAT_MODE_P (mode
))
38430 /* ??? SSE vector cost should be used here. */
38431 *total
= cost
->fsqrt
;
38435 if (XINT (x
, 1) == UNSPEC_TP
)
38441 case VEC_DUPLICATE
:
38442 /* ??? Assume all of these vector manipulation patterns are
38443 recognizable. In which case they all pretty much have the
38445 *total
= cost
->fabs
;
38448 mask
= XEXP (x
, 2);
38449 /* This is masked instruction, assume the same cost,
38450 as nonmasked variant. */
38451 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
38452 *total
= rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
);
38454 *total
= cost
->fabs
;
38464 static int current_machopic_label_num
;
38466 /* Given a symbol name and its associated stub, write out the
38467 definition of the stub. */
38470 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
38472 unsigned int length
;
38473 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
38474 int label
= ++current_machopic_label_num
;
38476 /* For 64-bit we shouldn't get here. */
38477 gcc_assert (!TARGET_64BIT
);
38479 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38480 symb
= targetm
.strip_name_encoding (symb
);
38482 length
= strlen (stub
);
38483 binder_name
= XALLOCAVEC (char, length
+ 32);
38484 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
38486 length
= strlen (symb
);
38487 symbol_name
= XALLOCAVEC (char, length
+ 32);
38488 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
38490 sprintf (lazy_ptr_name
, "L%d$lz", label
);
38492 if (MACHOPIC_ATT_STUB
)
38493 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
38494 else if (MACHOPIC_PURE
)
38495 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
38497 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
38499 fprintf (file
, "%s:\n", stub
);
38500 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
38502 if (MACHOPIC_ATT_STUB
)
38504 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38506 else if (MACHOPIC_PURE
)
38509 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38510 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
38511 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38512 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38513 label
, lazy_ptr_name
, label
);
38514 fprintf (file
, "\tjmp\t*%%ecx\n");
38517 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
38519 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38520 it needs no stub-binding-helper. */
38521 if (MACHOPIC_ATT_STUB
)
38524 fprintf (file
, "%s:\n", binder_name
);
38528 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
38529 fprintf (file
, "\tpushl\t%%ecx\n");
38532 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
38534 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
38536 /* N.B. Keep the correspondence of these
38537 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38538 old-pic/new-pic/non-pic stubs; altering this will break
38539 compatibility with existing dylibs. */
38542 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38543 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
38546 /* 16-byte -mdynamic-no-pic stub. */
38547 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
38549 fprintf (file
, "%s:\n", lazy_ptr_name
);
38550 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
38551 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
38553 #endif /* TARGET_MACHO */
38555 /* Order the registers for register allocator. */
38558 x86_order_regs_for_local_alloc (void)
38563 /* First allocate the local general purpose registers. */
38564 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
38565 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
38566 reg_alloc_order
[pos
++] = i
;
38568 /* Global general purpose registers. */
38569 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
38570 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
38571 reg_alloc_order
[pos
++] = i
;
38573 /* x87 registers come first in case we are doing FP math
38575 if (!TARGET_SSE_MATH
)
38576 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
38577 reg_alloc_order
[pos
++] = i
;
38579 /* SSE registers. */
38580 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
38581 reg_alloc_order
[pos
++] = i
;
38582 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
38583 reg_alloc_order
[pos
++] = i
;
38585 /* Extended REX SSE registers. */
38586 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
38587 reg_alloc_order
[pos
++] = i
;
38589 /* Mask register. */
38590 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
38591 reg_alloc_order
[pos
++] = i
;
38593 /* x87 registers. */
38594 if (TARGET_SSE_MATH
)
38595 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
38596 reg_alloc_order
[pos
++] = i
;
38598 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
38599 reg_alloc_order
[pos
++] = i
;
38601 /* Initialize the rest of array as we do not allocate some registers
38603 while (pos
< FIRST_PSEUDO_REGISTER
)
38604 reg_alloc_order
[pos
++] = 0;
38607 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38608 in struct attribute_spec handler. */
38610 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
38613 bool *no_add_attrs
)
38615 if (TREE_CODE (*node
) != FUNCTION_TYPE
38616 && TREE_CODE (*node
) != METHOD_TYPE
38617 && TREE_CODE (*node
) != FIELD_DECL
38618 && TREE_CODE (*node
) != TYPE_DECL
)
38620 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
38622 *no_add_attrs
= true;
38627 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
38629 *no_add_attrs
= true;
38632 if (is_attribute_p ("callee_pop_aggregate_return", name
))
38636 cst
= TREE_VALUE (args
);
38637 if (TREE_CODE (cst
) != INTEGER_CST
)
38639 warning (OPT_Wattributes
,
38640 "%qE attribute requires an integer constant argument",
38642 *no_add_attrs
= true;
38644 else if (compare_tree_int (cst
, 0) != 0
38645 && compare_tree_int (cst
, 1) != 0)
38647 warning (OPT_Wattributes
,
38648 "argument to %qE attribute is neither zero, nor one",
38650 *no_add_attrs
= true;
38659 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38660 struct attribute_spec.handler. */
38662 ix86_handle_abi_attribute (tree
*node
, tree name
, tree
, int,
38663 bool *no_add_attrs
)
38665 if (TREE_CODE (*node
) != FUNCTION_TYPE
38666 && TREE_CODE (*node
) != METHOD_TYPE
38667 && TREE_CODE (*node
) != FIELD_DECL
38668 && TREE_CODE (*node
) != TYPE_DECL
)
38670 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
38672 *no_add_attrs
= true;
38676 /* Can combine regparm with all attributes but fastcall. */
38677 if (is_attribute_p ("ms_abi", name
))
38679 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
38681 error ("ms_abi and sysv_abi attributes are not compatible");
38686 else if (is_attribute_p ("sysv_abi", name
))
38688 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
38690 error ("ms_abi and sysv_abi attributes are not compatible");
38699 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38700 struct attribute_spec.handler. */
38702 ix86_handle_struct_attribute (tree
*node
, tree name
, tree
, int,
38703 bool *no_add_attrs
)
38706 if (DECL_P (*node
))
38708 if (TREE_CODE (*node
) == TYPE_DECL
)
38709 type
= &TREE_TYPE (*node
);
38714 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
38716 warning (OPT_Wattributes
, "%qE attribute ignored",
38718 *no_add_attrs
= true;
38721 else if ((is_attribute_p ("ms_struct", name
)
38722 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
38723 || ((is_attribute_p ("gcc_struct", name
)
38724 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
38726 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
38728 *no_add_attrs
= true;
38735 ix86_handle_fndecl_attribute (tree
*node
, tree name
, tree
, int,
38736 bool *no_add_attrs
)
38738 if (TREE_CODE (*node
) != FUNCTION_DECL
)
38740 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
38742 *no_add_attrs
= true;
38748 ix86_ms_bitfield_layout_p (const_tree record_type
)
38750 return ((TARGET_MS_BITFIELD_LAYOUT
38751 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
38752 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
38755 /* Returns an expression indicating where the this parameter is
38756 located on entry to the FUNCTION. */
38759 x86_this_parameter (tree function
)
38761 tree type
= TREE_TYPE (function
);
38762 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
38767 const int *parm_regs
;
38769 if (ix86_function_type_abi (type
) == MS_ABI
)
38770 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
38772 parm_regs
= x86_64_int_parameter_registers
;
38773 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
38776 nregs
= ix86_function_regparm (type
, function
);
38778 if (nregs
> 0 && !stdarg_p (type
))
38781 unsigned int ccvt
= ix86_get_callcvt (type
);
38783 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
38784 regno
= aggr
? DX_REG
: CX_REG
;
38785 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
38789 return gen_rtx_MEM (SImode
,
38790 plus_constant (Pmode
, stack_pointer_rtx
, 4));
38799 return gen_rtx_MEM (SImode
,
38800 plus_constant (Pmode
,
38801 stack_pointer_rtx
, 4));
38804 return gen_rtx_REG (SImode
, regno
);
38807 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
38811 /* Determine whether x86_output_mi_thunk can succeed. */
38814 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
38815 const_tree function
)
38817 /* 64-bit can handle anything. */
38821 /* For 32-bit, everything's fine if we have one free register. */
38822 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
38825 /* Need a free register for vcall_offset. */
38829 /* Need a free register for GOT references. */
38830 if (flag_pic
&& !targetm
.binds_local_p (function
))
38833 /* Otherwise ok. */
38837 /* Output the assembler code for a thunk function. THUNK_DECL is the
38838 declaration for the thunk function itself, FUNCTION is the decl for
38839 the target function. DELTA is an immediate constant offset to be
38840 added to THIS. If VCALL_OFFSET is nonzero, the word at
38841 *(*this + vcall_offset) should be added to THIS. */
38844 x86_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
38845 HOST_WIDE_INT vcall_offset
, tree function
)
38847 rtx this_param
= x86_this_parameter (function
);
38848 rtx this_reg
, tmp
, fnaddr
;
38849 unsigned int tmp_regno
;
38853 tmp_regno
= R10_REG
;
38856 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
38857 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
38858 tmp_regno
= AX_REG
;
38859 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
38860 tmp_regno
= DX_REG
;
38862 tmp_regno
= CX_REG
;
38865 emit_note (NOTE_INSN_PROLOGUE_END
);
38867 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38868 pull it in now and let DELTA benefit. */
38869 if (REG_P (this_param
))
38870 this_reg
= this_param
;
38871 else if (vcall_offset
)
38873 /* Put the this parameter into %eax. */
38874 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
38875 emit_move_insn (this_reg
, this_param
);
38878 this_reg
= NULL_RTX
;
38880 /* Adjust the this parameter by a fixed constant. */
38883 rtx delta_rtx
= GEN_INT (delta
);
38884 rtx delta_dst
= this_reg
? this_reg
: this_param
;
38888 if (!x86_64_general_operand (delta_rtx
, Pmode
))
38890 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
38891 emit_move_insn (tmp
, delta_rtx
);
38896 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
38899 /* Adjust the this parameter by a value stored in the vtable. */
38902 rtx vcall_addr
, vcall_mem
, this_mem
;
38904 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
38906 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
38907 if (Pmode
!= ptr_mode
)
38908 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
38909 emit_move_insn (tmp
, this_mem
);
38911 /* Adjust the this parameter. */
38912 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
38914 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
38916 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
38917 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
38918 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
38921 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
38922 if (Pmode
!= ptr_mode
)
38923 emit_insn (gen_addsi_1_zext (this_reg
,
38924 gen_rtx_REG (ptr_mode
,
38928 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
38931 /* If necessary, drop THIS back to its stack slot. */
38932 if (this_reg
&& this_reg
!= this_param
)
38933 emit_move_insn (this_param
, this_reg
);
38935 fnaddr
= XEXP (DECL_RTL (function
), 0);
38938 if (!flag_pic
|| targetm
.binds_local_p (function
)
38943 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
38944 tmp
= gen_rtx_CONST (Pmode
, tmp
);
38945 fnaddr
= gen_const_mem (Pmode
, tmp
);
38950 if (!flag_pic
|| targetm
.binds_local_p (function
))
38953 else if (TARGET_MACHO
)
38955 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
38956 fnaddr
= XEXP (fnaddr
, 0);
38958 #endif /* TARGET_MACHO */
38961 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
38962 output_set_got (tmp
, NULL_RTX
);
38964 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
38965 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
38966 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
38967 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
38971 /* Our sibling call patterns do not allow memories, because we have no
38972 predicate that can distinguish between frame and non-frame memory.
38973 For our purposes here, we can get away with (ab)using a jump pattern,
38974 because we're going to do no optimization. */
38975 if (MEM_P (fnaddr
))
38977 if (sibcall_insn_operand (fnaddr
, word_mode
))
38979 tmp
= gen_rtx_CALL (VOIDmode
, fnaddr
, const0_rtx
);
38980 tmp
= emit_call_insn (tmp
);
38981 SIBLING_CALL_P (tmp
) = 1;
38984 emit_jump_insn (gen_indirect_jump (fnaddr
));
38988 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
38989 fnaddr
= legitimize_pic_address (fnaddr
,
38990 gen_rtx_REG (Pmode
, tmp_regno
));
38992 if (!sibcall_insn_operand (fnaddr
, word_mode
))
38994 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
38995 if (GET_MODE (fnaddr
) != word_mode
)
38996 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
38997 emit_move_insn (tmp
, fnaddr
);
39001 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
39002 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
39003 tmp
= emit_call_insn (tmp
);
39004 SIBLING_CALL_P (tmp
) = 1;
39008 /* Emit just enough of rest_of_compilation to get the insns emitted.
39009 Note that use_thunk calls assemble_start_function et al. */
39010 insn
= get_insns ();
39011 shorten_branches (insn
);
39012 final_start_function (insn
, file
, 1);
39013 final (insn
, file
, 1);
39014 final_end_function ();
39018 x86_file_start (void)
39020 default_file_start ();
39022 fputs ("\t.code16gcc\n", asm_out_file
);
39024 darwin_file_start ();
39026 if (X86_FILE_START_VERSION_DIRECTIVE
)
39027 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
39028 if (X86_FILE_START_FLTUSED
)
39029 fputs ("\t.global\t__fltused\n", asm_out_file
);
39030 if (ix86_asm_dialect
== ASM_INTEL
)
39031 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
39035 x86_field_alignment (tree field
, int computed
)
39037 enum machine_mode mode
;
39038 tree type
= TREE_TYPE (field
);
39040 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
39042 mode
= TYPE_MODE (strip_array_types (type
));
39043 if (mode
== DFmode
|| mode
== DCmode
39044 || GET_MODE_CLASS (mode
) == MODE_INT
39045 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
39046 return MIN (32, computed
);
39050 /* Print call to TARGET to FILE. */
39053 x86_print_call_or_nop (FILE *file
, const char *target
)
39055 if (flag_nop_mcount
)
39056 fprintf (file
, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
39058 fprintf (file
, "1:\tcall\t%s\n", target
);
39061 /* Output assembler code to FILE to increment profiler label # LABELNO
39062 for profiling a function entry. */
39064 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
39066 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
39070 #ifndef NO_PROFILE_COUNTERS
39071 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
39074 if (!TARGET_PECOFF
&& flag_pic
)
39075 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
39077 x86_print_call_or_nop (file
, mcount_name
);
39081 #ifndef NO_PROFILE_COUNTERS
39082 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
39085 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
39089 #ifndef NO_PROFILE_COUNTERS
39090 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
39093 x86_print_call_or_nop (file
, mcount_name
);
39096 if (flag_record_mcount
)
39098 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
39099 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
39100 fprintf (file
, "\t.previous\n");
39104 /* We don't have exact information about the insn sizes, but we may assume
39105 quite safely that we are informed about all 1 byte insns and memory
39106 address sizes. This is enough to eliminate unnecessary padding in
39110 min_insn_size (rtx_insn
*insn
)
39114 if (!INSN_P (insn
) || !active_insn_p (insn
))
39117 /* Discard alignments we've emit and jump instructions. */
39118 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
39119 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
39122 /* Important case - calls are always 5 bytes.
39123 It is common to have many calls in the row. */
39125 && symbolic_reference_mentioned_p (PATTERN (insn
))
39126 && !SIBLING_CALL_P (insn
))
39128 len
= get_attr_length (insn
);
39132 /* For normal instructions we rely on get_attr_length being exact,
39133 with a few exceptions. */
39134 if (!JUMP_P (insn
))
39136 enum attr_type type
= get_attr_type (insn
);
39141 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
39142 || asm_noperands (PATTERN (insn
)) >= 0)
39149 /* Otherwise trust get_attr_length. */
39153 l
= get_attr_length_address (insn
);
39154 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
39163 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39165 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39169 ix86_avoid_jump_mispredicts (void)
39171 rtx_insn
*insn
, *start
= get_insns ();
39172 int nbytes
= 0, njumps
= 0;
39175 /* Look for all minimal intervals of instructions containing 4 jumps.
39176 The intervals are bounded by START and INSN. NBYTES is the total
39177 size of instructions in the interval including INSN and not including
39178 START. When the NBYTES is smaller than 16 bytes, it is possible
39179 that the end of START and INSN ends up in the same 16byte page.
39181 The smallest offset in the page INSN can start is the case where START
39182 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39183 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39185 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39186 have to, control transfer to label(s) can be performed through other
39187 means, and also we estimate minimum length of all asm stmts as 0. */
39188 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
39192 if (LABEL_P (insn
))
39194 int align
= label_to_alignment (insn
);
39195 int max_skip
= label_to_max_skip (insn
);
39199 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39200 already in the current 16 byte page, because otherwise
39201 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39202 bytes to reach 16 byte boundary. */
39204 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
39207 fprintf (dump_file
, "Label %i with max_skip %i\n",
39208 INSN_UID (insn
), max_skip
);
39211 while (nbytes
+ max_skip
>= 16)
39213 start
= NEXT_INSN (start
);
39214 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
39216 njumps
--, isjump
= 1;
39219 nbytes
-= min_insn_size (start
);
39225 min_size
= min_insn_size (insn
);
39226 nbytes
+= min_size
;
39228 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
39229 INSN_UID (insn
), min_size
);
39230 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
39238 start
= NEXT_INSN (start
);
39239 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
39241 njumps
--, isjump
= 1;
39244 nbytes
-= min_insn_size (start
);
39246 gcc_assert (njumps
>= 0);
39248 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
39249 INSN_UID (start
), INSN_UID (insn
), nbytes
);
39251 if (njumps
== 3 && isjump
&& nbytes
< 16)
39253 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
39256 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
39257 INSN_UID (insn
), padsize
);
39258 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
39264 /* AMD Athlon works faster
39265 when RET is not destination of conditional jump or directly preceded
39266 by other jump instruction. We avoid the penalty by inserting NOP just
39267 before the RET instructions in such cases. */
39269 ix86_pad_returns (void)
39274 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
39276 basic_block bb
= e
->src
;
39277 rtx_insn
*ret
= BB_END (bb
);
39279 bool replace
= false;
39281 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
39282 || optimize_bb_for_size_p (bb
))
39284 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
39285 if (active_insn_p (prev
) || LABEL_P (prev
))
39287 if (prev
&& LABEL_P (prev
))
39292 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
39293 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
39294 && !(e
->flags
& EDGE_FALLTHRU
))
39302 prev
= prev_active_insn (ret
);
39304 && ((JUMP_P (prev
) && any_condjump_p (prev
))
39307 /* Empty functions get branch mispredict even when
39308 the jump destination is not visible to us. */
39309 if (!prev
&& !optimize_function_for_size_p (cfun
))
39314 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
39320 /* Count the minimum number of instructions in BB. Return 4 if the
39321 number of instructions >= 4. */
39324 ix86_count_insn_bb (basic_block bb
)
39327 int insn_count
= 0;
39329 /* Count number of instructions in this block. Return 4 if the number
39330 of instructions >= 4. */
39331 FOR_BB_INSNS (bb
, insn
)
39333 /* Only happen in exit blocks. */
39335 && ANY_RETURN_P (PATTERN (insn
)))
39338 if (NONDEBUG_INSN_P (insn
)
39339 && GET_CODE (PATTERN (insn
)) != USE
39340 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
39343 if (insn_count
>= 4)
39352 /* Count the minimum number of instructions in code path in BB.
39353 Return 4 if the number of instructions >= 4. */
39356 ix86_count_insn (basic_block bb
)
39360 int min_prev_count
;
39362 /* Only bother counting instructions along paths with no
39363 more than 2 basic blocks between entry and exit. Given
39364 that BB has an edge to exit, determine if a predecessor
39365 of BB has an edge from entry. If so, compute the number
39366 of instructions in the predecessor block. If there
39367 happen to be multiple such blocks, compute the minimum. */
39368 min_prev_count
= 4;
39369 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
39372 edge_iterator prev_ei
;
39374 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
39376 min_prev_count
= 0;
39379 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
39381 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
39383 int count
= ix86_count_insn_bb (e
->src
);
39384 if (count
< min_prev_count
)
39385 min_prev_count
= count
;
39391 if (min_prev_count
< 4)
39392 min_prev_count
+= ix86_count_insn_bb (bb
);
39394 return min_prev_count
;
39397 /* Pad short function to 4 instructions. */
39400 ix86_pad_short_function (void)
39405 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
39407 rtx_insn
*ret
= BB_END (e
->src
);
39408 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
39410 int insn_count
= ix86_count_insn (e
->src
);
39412 /* Pad short function. */
39413 if (insn_count
< 4)
39415 rtx_insn
*insn
= ret
;
39417 /* Find epilogue. */
39420 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
39421 insn
= PREV_INSN (insn
);
39426 /* Two NOPs count as one instruction. */
39427 insn_count
= 2 * (4 - insn_count
);
39428 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
39434 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39435 the epilogue, the Windows system unwinder will apply epilogue logic and
39436 produce incorrect offsets. This can be avoided by adding a nop between
39437 the last insn that can throw and the first insn of the epilogue. */
39440 ix86_seh_fixup_eh_fallthru (void)
39445 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
39447 rtx_insn
*insn
, *next
;
39449 /* Find the beginning of the epilogue. */
39450 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
39451 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
39456 /* We only care about preceding insns that can throw. */
39457 insn
= prev_active_insn (insn
);
39458 if (insn
== NULL
|| !can_throw_internal (insn
))
39461 /* Do not separate calls from their debug information. */
39462 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
39464 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
39465 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
39470 emit_insn_after (gen_nops (const1_rtx
), insn
);
39474 /* Implement machine specific optimizations. We implement padding of returns
39475 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39479 /* We are freeing block_for_insn in the toplev to keep compatibility
39480 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39481 compute_bb_for_insn ();
39483 if (TARGET_SEH
&& current_function_has_exception_handlers ())
39484 ix86_seh_fixup_eh_fallthru ();
39486 if (optimize
&& optimize_function_for_speed_p (cfun
))
39488 if (TARGET_PAD_SHORT_FUNCTION
)
39489 ix86_pad_short_function ();
39490 else if (TARGET_PAD_RETURNS
)
39491 ix86_pad_returns ();
39492 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39493 if (TARGET_FOUR_JUMP_LIMIT
)
39494 ix86_avoid_jump_mispredicts ();
39499 /* Return nonzero when QImode register that must be represented via REX prefix
39502 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
39505 extract_insn_cached (insn
);
39506 for (i
= 0; i
< recog_data
.n_operands
; i
++)
39507 if (GENERAL_REG_P (recog_data
.operand
[i
])
39508 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
39513 /* Return nonzero when P points to register encoded via REX prefix.
39514 Called via for_each_rtx. */
39516 extended_reg_mentioned_1 (rtx
*p
, void *)
39518 unsigned int regno
;
39521 regno
= REGNO (*p
);
39522 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
39525 /* Return true when INSN mentions register that must be encoded using REX
39528 x86_extended_reg_mentioned_p (rtx insn
)
39530 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
39531 extended_reg_mentioned_1
, NULL
);
39534 /* If profitable, negate (without causing overflow) integer constant
39535 of mode MODE at location LOC. Return true in this case. */
39537 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
39541 if (!CONST_INT_P (*loc
))
39547 /* DImode x86_64 constants must fit in 32 bits. */
39548 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
39559 gcc_unreachable ();
39562 /* Avoid overflows. */
39563 if (mode_signbit_p (mode
, *loc
))
39566 val
= INTVAL (*loc
);
39568 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39569 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39570 if ((val
< 0 && val
!= -128)
39573 *loc
= GEN_INT (-val
);
39580 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39581 optabs would emit if we didn't have TFmode patterns. */
39584 x86_emit_floatuns (rtx operands
[2])
39586 rtx_code_label
*neglab
, *donelab
;
39587 rtx i0
, i1
, f0
, in
, out
;
39588 enum machine_mode mode
, inmode
;
39590 inmode
= GET_MODE (operands
[1]);
39591 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
39594 in
= force_reg (inmode
, operands
[1]);
39595 mode
= GET_MODE (out
);
39596 neglab
= gen_label_rtx ();
39597 donelab
= gen_label_rtx ();
39598 f0
= gen_reg_rtx (mode
);
39600 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
39602 expand_float (out
, in
, 0);
39604 emit_jump_insn (gen_jump (donelab
));
39607 emit_label (neglab
);
39609 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
39611 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
39613 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
39615 expand_float (f0
, i0
, 0);
39617 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
39619 emit_label (donelab
);
39622 /* AVX512F does support 64-byte integer vector operations,
39623 thus the longest vector we are faced with is V64QImode. */
39624 #define MAX_VECT_LEN 64
39626 struct expand_vec_perm_d
39628 rtx target
, op0
, op1
;
39629 unsigned char perm
[MAX_VECT_LEN
];
39630 enum machine_mode vmode
;
39631 unsigned char nelt
;
39632 bool one_operand_p
;
39636 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
39637 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
39638 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
39640 /* Get a vector mode of the same size as the original but with elements
39641 twice as wide. This is only guaranteed to apply to integral vectors. */
39643 static inline enum machine_mode
39644 get_mode_wider_vector (enum machine_mode o
)
39646 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39647 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
39648 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
39649 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
39653 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39654 fill target with val via vec_duplicate. */
39657 ix86_vector_duplicate_value (enum machine_mode mode
, rtx target
, rtx val
)
39663 /* First attempt to recognize VAL as-is. */
39664 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
39665 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
39666 if (recog_memoized (insn
) < 0)
39669 /* If that fails, force VAL into a register. */
39672 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
39673 seq
= get_insns ();
39676 emit_insn_before (seq
, insn
);
39678 ok
= recog_memoized (insn
) >= 0;
39684 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39685 with all elements equal to VAR. Return true if successful. */
39688 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
39689 rtx target
, rtx val
)
39713 return ix86_vector_duplicate_value (mode
, target
, val
);
39718 if (TARGET_SSE
|| TARGET_3DNOW_A
)
39722 val
= gen_lowpart (SImode
, val
);
39723 x
= gen_rtx_TRUNCATE (HImode
, val
);
39724 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
39725 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
39738 struct expand_vec_perm_d dperm
;
39742 memset (&dperm
, 0, sizeof (dperm
));
39743 dperm
.target
= target
;
39744 dperm
.vmode
= mode
;
39745 dperm
.nelt
= GET_MODE_NUNITS (mode
);
39746 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
39747 dperm
.one_operand_p
= true;
39749 /* Extend to SImode using a paradoxical SUBREG. */
39750 tmp1
= gen_reg_rtx (SImode
);
39751 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
39753 /* Insert the SImode value as low element of a V4SImode vector. */
39754 tmp2
= gen_reg_rtx (V4SImode
);
39755 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
39756 emit_move_insn (dperm
.op0
, gen_lowpart (mode
, tmp2
));
39758 ok
= (expand_vec_perm_1 (&dperm
)
39759 || expand_vec_perm_broadcast_1 (&dperm
));
39771 /* Replicate the value once into the next wider mode and recurse. */
39773 enum machine_mode smode
, wsmode
, wvmode
;
39776 smode
= GET_MODE_INNER (mode
);
39777 wvmode
= get_mode_wider_vector (mode
);
39778 wsmode
= GET_MODE_INNER (wvmode
);
39780 val
= convert_modes (wsmode
, smode
, val
, true);
39781 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
39782 GEN_INT (GET_MODE_BITSIZE (smode
)),
39783 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
39784 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
39786 x
= gen_reg_rtx (wvmode
);
39787 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
39789 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), x
));
39796 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
39797 rtx x
= gen_reg_rtx (hvmode
);
39799 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
39802 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
39803 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
39812 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39813 whose ONE_VAR element is VAR, and other elements are zero. Return true
39817 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
39818 rtx target
, rtx var
, int one_var
)
39820 enum machine_mode vsimode
;
39823 bool use_vector_set
= false;
39828 /* For SSE4.1, we normally use vector set. But if the second
39829 element is zero and inter-unit moves are OK, we use movq
39831 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
39832 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39838 use_vector_set
= TARGET_SSE4_1
;
39841 use_vector_set
= TARGET_SSE2
;
39844 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
39851 use_vector_set
= TARGET_AVX
;
39854 /* Use ix86_expand_vector_set in 64bit mode only. */
39855 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
39861 if (use_vector_set
)
39863 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
39864 var
= force_reg (GET_MODE_INNER (mode
), var
);
39865 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
39881 var
= force_reg (GET_MODE_INNER (mode
), var
);
39882 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
39883 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
39888 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
39889 new_target
= gen_reg_rtx (mode
);
39891 new_target
= target
;
39892 var
= force_reg (GET_MODE_INNER (mode
), var
);
39893 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
39894 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
39895 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
39898 /* We need to shuffle the value to the correct position, so
39899 create a new pseudo to store the intermediate result. */
39901 /* With SSE2, we can use the integer shuffle insns. */
39902 if (mode
!= V4SFmode
&& TARGET_SSE2
)
39904 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
39906 GEN_INT (one_var
== 1 ? 0 : 1),
39907 GEN_INT (one_var
== 2 ? 0 : 1),
39908 GEN_INT (one_var
== 3 ? 0 : 1)));
39909 if (target
!= new_target
)
39910 emit_move_insn (target
, new_target
);
39914 /* Otherwise convert the intermediate result to V4SFmode and
39915 use the SSE1 shuffle instructions. */
39916 if (mode
!= V4SFmode
)
39918 tmp
= gen_reg_rtx (V4SFmode
);
39919 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
39924 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
39926 GEN_INT (one_var
== 1 ? 0 : 1),
39927 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
39928 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
39930 if (mode
!= V4SFmode
)
39931 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
39932 else if (tmp
!= target
)
39933 emit_move_insn (target
, tmp
);
39935 else if (target
!= new_target
)
39936 emit_move_insn (target
, new_target
);
39941 vsimode
= V4SImode
;
39947 vsimode
= V2SImode
;
39953 /* Zero extend the variable element to SImode and recurse. */
39954 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
39956 x
= gen_reg_rtx (vsimode
);
39957 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
39959 gcc_unreachable ();
39961 emit_move_insn (target
, gen_lowpart (mode
, x
));
39969 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39970 consisting of the values in VALS. It is known that all elements
39971 except ONE_VAR are constants. Return true if successful. */
39974 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
39975 rtx target
, rtx vals
, int one_var
)
39977 rtx var
= XVECEXP (vals
, 0, one_var
);
39978 enum machine_mode wmode
;
39981 const_vec
= copy_rtx (vals
);
39982 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
39983 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
39991 /* For the two element vectors, it's just as easy to use
39992 the general case. */
39996 /* Use ix86_expand_vector_set in 64bit mode only. */
40019 /* There's no way to set one QImode entry easily. Combine
40020 the variable value with its adjacent constant value, and
40021 promote to an HImode set. */
40022 x
= XVECEXP (vals
, 0, one_var
^ 1);
40025 var
= convert_modes (HImode
, QImode
, var
, true);
40026 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
40027 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
40028 x
= GEN_INT (INTVAL (x
) & 0xff);
40032 var
= convert_modes (HImode
, QImode
, var
, true);
40033 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
40035 if (x
!= const0_rtx
)
40036 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
40037 1, OPTAB_LIB_WIDEN
);
40039 x
= gen_reg_rtx (wmode
);
40040 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
40041 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
40043 emit_move_insn (target
, gen_lowpart (mode
, x
));
40050 emit_move_insn (target
, const_vec
);
40051 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
40055 /* A subroutine of ix86_expand_vector_init_general. Use vector
40056 concatenate to handle the most general case: all values variable,
40057 and none identical. */
40060 ix86_expand_vector_init_concat (enum machine_mode mode
,
40061 rtx target
, rtx
*ops
, int n
)
40063 enum machine_mode cmode
, hmode
= VOIDmode
, gmode
= VOIDmode
;
40064 rtx first
[16], second
[8], third
[4];
40116 gcc_unreachable ();
40119 if (!register_operand (ops
[1], cmode
))
40120 ops
[1] = force_reg (cmode
, ops
[1]);
40121 if (!register_operand (ops
[0], cmode
))
40122 ops
[0] = force_reg (cmode
, ops
[0]);
40123 emit_insn (gen_rtx_SET (VOIDmode
, target
,
40124 gen_rtx_VEC_CONCAT (mode
, ops
[0],
40144 gcc_unreachable ();
40168 gcc_unreachable ();
40186 gcc_unreachable ();
40191 /* FIXME: We process inputs backward to help RA. PR 36222. */
40194 for (; i
> 0; i
-= 2, j
--)
40196 first
[j
] = gen_reg_rtx (cmode
);
40197 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
40198 ix86_expand_vector_init (false, first
[j
],
40199 gen_rtx_PARALLEL (cmode
, v
));
40205 gcc_assert (hmode
!= VOIDmode
);
40206 gcc_assert (gmode
!= VOIDmode
);
40207 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
40209 second
[j
] = gen_reg_rtx (hmode
);
40210 ix86_expand_vector_init_concat (hmode
, second
[j
],
40214 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
40216 third
[j
] = gen_reg_rtx (gmode
);
40217 ix86_expand_vector_init_concat (gmode
, third
[j
],
40221 ix86_expand_vector_init_concat (mode
, target
, third
, n
);
40225 gcc_assert (hmode
!= VOIDmode
);
40226 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
40228 second
[j
] = gen_reg_rtx (hmode
);
40229 ix86_expand_vector_init_concat (hmode
, second
[j
],
40233 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
40236 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
40240 gcc_unreachable ();
40244 /* A subroutine of ix86_expand_vector_init_general. Use vector
40245 interleave to handle the most general case: all values variable,
40246 and none identical. */
40249 ix86_expand_vector_init_interleave (enum machine_mode mode
,
40250 rtx target
, rtx
*ops
, int n
)
40252 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
40255 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
40256 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
40257 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
40262 gen_load_even
= gen_vec_setv8hi
;
40263 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
40264 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
40265 inner_mode
= HImode
;
40266 first_imode
= V4SImode
;
40267 second_imode
= V2DImode
;
40268 third_imode
= VOIDmode
;
40271 gen_load_even
= gen_vec_setv16qi
;
40272 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
40273 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
40274 inner_mode
= QImode
;
40275 first_imode
= V8HImode
;
40276 second_imode
= V4SImode
;
40277 third_imode
= V2DImode
;
40280 gcc_unreachable ();
40283 for (i
= 0; i
< n
; i
++)
40285 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40286 op0
= gen_reg_rtx (SImode
);
40287 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
40289 /* Insert the SImode value as low element of V4SImode vector. */
40290 op1
= gen_reg_rtx (V4SImode
);
40291 op0
= gen_rtx_VEC_MERGE (V4SImode
,
40292 gen_rtx_VEC_DUPLICATE (V4SImode
,
40294 CONST0_RTX (V4SImode
),
40296 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
40298 /* Cast the V4SImode vector back to a vector in orignal mode. */
40299 op0
= gen_reg_rtx (mode
);
40300 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
40302 /* Load even elements into the second position. */
40303 emit_insn (gen_load_even (op0
,
40304 force_reg (inner_mode
,
40308 /* Cast vector to FIRST_IMODE vector. */
40309 ops
[i
] = gen_reg_rtx (first_imode
);
40310 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
40313 /* Interleave low FIRST_IMODE vectors. */
40314 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
40316 op0
= gen_reg_rtx (first_imode
);
40317 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
40319 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40320 ops
[j
] = gen_reg_rtx (second_imode
);
40321 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
40324 /* Interleave low SECOND_IMODE vectors. */
40325 switch (second_imode
)
40328 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
40330 op0
= gen_reg_rtx (second_imode
);
40331 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
40334 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40336 ops
[j
] = gen_reg_rtx (third_imode
);
40337 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
40339 second_imode
= V2DImode
;
40340 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
40344 op0
= gen_reg_rtx (second_imode
);
40345 emit_insn (gen_interleave_second_low (op0
, ops
[0],
40348 /* Cast the SECOND_IMODE vector back to a vector on original
40350 emit_insn (gen_rtx_SET (VOIDmode
, target
,
40351 gen_lowpart (mode
, op0
)));
40355 gcc_unreachable ();
40359 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40360 all values variable, and none identical. */
40363 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
40364 rtx target
, rtx vals
)
40366 rtx ops
[64], op0
, op1
;
40367 enum machine_mode half_mode
= VOIDmode
;
40374 if (!mmx_ok
&& !TARGET_SSE
)
40390 n
= GET_MODE_NUNITS (mode
);
40391 for (i
= 0; i
< n
; i
++)
40392 ops
[i
] = XVECEXP (vals
, 0, i
);
40393 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
40397 half_mode
= V16QImode
;
40401 half_mode
= V8HImode
;
40405 n
= GET_MODE_NUNITS (mode
);
40406 for (i
= 0; i
< n
; i
++)
40407 ops
[i
] = XVECEXP (vals
, 0, i
);
40408 op0
= gen_reg_rtx (half_mode
);
40409 op1
= gen_reg_rtx (half_mode
);
40410 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
40412 ix86_expand_vector_init_interleave (half_mode
, op1
,
40413 &ops
[n
>> 1], n
>> 2);
40414 emit_insn (gen_rtx_SET (VOIDmode
, target
,
40415 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
40419 if (!TARGET_SSE4_1
)
40427 /* Don't use ix86_expand_vector_init_interleave if we can't
40428 move from GPR to SSE register directly. */
40429 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
40432 n
= GET_MODE_NUNITS (mode
);
40433 for (i
= 0; i
< n
; i
++)
40434 ops
[i
] = XVECEXP (vals
, 0, i
);
40435 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
40443 gcc_unreachable ();
40447 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
40448 enum machine_mode inner_mode
;
40449 rtx words
[4], shift
;
40451 inner_mode
= GET_MODE_INNER (mode
);
40452 n_elts
= GET_MODE_NUNITS (mode
);
40453 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
40454 n_elt_per_word
= n_elts
/ n_words
;
40455 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
40457 for (i
= 0; i
< n_words
; ++i
)
40459 rtx word
= NULL_RTX
;
40461 for (j
= 0; j
< n_elt_per_word
; ++j
)
40463 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
40464 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
40470 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
40471 word
, 1, OPTAB_LIB_WIDEN
);
40472 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
40473 word
, 1, OPTAB_LIB_WIDEN
);
40481 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
40482 else if (n_words
== 2)
40484 rtx tmp
= gen_reg_rtx (mode
);
40485 emit_clobber (tmp
);
40486 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
40487 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
40488 emit_move_insn (target
, tmp
);
40490 else if (n_words
== 4)
40492 rtx tmp
= gen_reg_rtx (V4SImode
);
40493 gcc_assert (word_mode
== SImode
);
40494 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
40495 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
40496 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
40499 gcc_unreachable ();
40503 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40504 instructions unless MMX_OK is true. */
40507 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
40509 enum machine_mode mode
= GET_MODE (target
);
40510 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
40511 int n_elts
= GET_MODE_NUNITS (mode
);
40512 int n_var
= 0, one_var
= -1;
40513 bool all_same
= true, all_const_zero
= true;
40517 for (i
= 0; i
< n_elts
; ++i
)
40519 x
= XVECEXP (vals
, 0, i
);
40520 if (!(CONST_INT_P (x
)
40521 || GET_CODE (x
) == CONST_DOUBLE
40522 || GET_CODE (x
) == CONST_FIXED
))
40523 n_var
++, one_var
= i
;
40524 else if (x
!= CONST0_RTX (inner_mode
))
40525 all_const_zero
= false;
40526 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
40530 /* Constants are best loaded from the constant pool. */
40533 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
40537 /* If all values are identical, broadcast the value. */
40539 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
40540 XVECEXP (vals
, 0, 0)))
40543 /* Values where only one field is non-constant are best loaded from
40544 the pool and overwritten via move later. */
40548 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
40549 XVECEXP (vals
, 0, one_var
),
40553 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
40557 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
40561 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
40563 enum machine_mode mode
= GET_MODE (target
);
40564 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
40565 enum machine_mode half_mode
;
40566 bool use_vec_merge
= false;
40568 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
40570 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
40571 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
40572 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
40573 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
40574 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
40575 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
40577 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
40579 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
40580 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
40581 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
40582 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
40583 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
40584 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
40594 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
40595 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
40597 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
40599 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
40600 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
40606 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
40610 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
40611 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
40613 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
40615 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
40616 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
40623 /* For the two element vectors, we implement a VEC_CONCAT with
40624 the extraction of the other element. */
40626 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
40627 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
40630 op0
= val
, op1
= tmp
;
40632 op0
= tmp
, op1
= val
;
40634 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
40635 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
40640 use_vec_merge
= TARGET_SSE4_1
;
40647 use_vec_merge
= true;
40651 /* tmp = target = A B C D */
40652 tmp
= copy_to_reg (target
);
40653 /* target = A A B B */
40654 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
40655 /* target = X A B B */
40656 ix86_expand_vector_set (false, target
, val
, 0);
40657 /* target = A X C D */
40658 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
40659 const1_rtx
, const0_rtx
,
40660 GEN_INT (2+4), GEN_INT (3+4)));
40664 /* tmp = target = A B C D */
40665 tmp
= copy_to_reg (target
);
40666 /* tmp = X B C D */
40667 ix86_expand_vector_set (false, tmp
, val
, 0);
40668 /* target = A B X D */
40669 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
40670 const0_rtx
, const1_rtx
,
40671 GEN_INT (0+4), GEN_INT (3+4)));
40675 /* tmp = target = A B C D */
40676 tmp
= copy_to_reg (target
);
40677 /* tmp = X B C D */
40678 ix86_expand_vector_set (false, tmp
, val
, 0);
40679 /* target = A B X D */
40680 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
40681 const0_rtx
, const1_rtx
,
40682 GEN_INT (2+4), GEN_INT (0+4)));
40686 gcc_unreachable ();
40691 use_vec_merge
= TARGET_SSE4_1
;
40695 /* Element 0 handled by vec_merge below. */
40698 use_vec_merge
= true;
40704 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40705 store into element 0, then shuffle them back. */
40709 order
[0] = GEN_INT (elt
);
40710 order
[1] = const1_rtx
;
40711 order
[2] = const2_rtx
;
40712 order
[3] = GEN_INT (3);
40713 order
[elt
] = const0_rtx
;
40715 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
40716 order
[1], order
[2], order
[3]));
40718 ix86_expand_vector_set (false, target
, val
, 0);
40720 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
40721 order
[1], order
[2], order
[3]));
40725 /* For SSE1, we have to reuse the V4SF code. */
40726 rtx t
= gen_reg_rtx (V4SFmode
);
40727 ix86_expand_vector_set (false, t
, gen_lowpart (SFmode
, val
), elt
);
40728 emit_move_insn (target
, gen_lowpart (mode
, t
));
40733 use_vec_merge
= TARGET_SSE2
;
40736 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
40740 use_vec_merge
= TARGET_SSE4_1
;
40747 half_mode
= V16QImode
;
40753 half_mode
= V8HImode
;
40759 half_mode
= V4SImode
;
40765 half_mode
= V2DImode
;
40771 half_mode
= V4SFmode
;
40777 half_mode
= V2DFmode
;
40783 /* Compute offset. */
40787 gcc_assert (i
<= 1);
40789 /* Extract the half. */
40790 tmp
= gen_reg_rtx (half_mode
);
40791 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
40793 /* Put val in tmp at elt. */
40794 ix86_expand_vector_set (false, tmp
, val
, elt
);
40797 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
40806 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
40807 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
40808 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
40812 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
40814 emit_move_insn (mem
, target
);
40816 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
40817 emit_move_insn (tmp
, val
);
40819 emit_move_insn (target
, mem
);
40824 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
40826 enum machine_mode mode
= GET_MODE (vec
);
40827 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
40828 bool use_vec_extr
= false;
40841 use_vec_extr
= true;
40845 use_vec_extr
= TARGET_SSE4_1
;
40857 tmp
= gen_reg_rtx (mode
);
40858 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
40859 GEN_INT (elt
), GEN_INT (elt
),
40860 GEN_INT (elt
+4), GEN_INT (elt
+4)));
40864 tmp
= gen_reg_rtx (mode
);
40865 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
40869 gcc_unreachable ();
40872 use_vec_extr
= true;
40877 use_vec_extr
= TARGET_SSE4_1
;
40891 tmp
= gen_reg_rtx (mode
);
40892 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
40893 GEN_INT (elt
), GEN_INT (elt
),
40894 GEN_INT (elt
), GEN_INT (elt
)));
40898 tmp
= gen_reg_rtx (mode
);
40899 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
40903 gcc_unreachable ();
40906 use_vec_extr
= true;
40911 /* For SSE1, we have to reuse the V4SF code. */
40912 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
40913 gen_lowpart (V4SFmode
, vec
), elt
);
40919 use_vec_extr
= TARGET_SSE2
;
40922 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
40926 use_vec_extr
= TARGET_SSE4_1
;
40932 tmp
= gen_reg_rtx (V4SFmode
);
40934 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
40936 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
40937 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
40945 tmp
= gen_reg_rtx (V2DFmode
);
40947 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
40949 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
40950 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
40958 tmp
= gen_reg_rtx (V16QImode
);
40960 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
40962 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
40963 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
40971 tmp
= gen_reg_rtx (V8HImode
);
40973 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
40975 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
40976 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
40984 tmp
= gen_reg_rtx (V4SImode
);
40986 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
40988 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
40989 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
40997 tmp
= gen_reg_rtx (V2DImode
);
40999 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
41001 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
41002 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
41008 if (TARGET_AVX512BW
)
41010 tmp
= gen_reg_rtx (V16HImode
);
41012 emit_insn (gen_vec_extract_lo_v32hi (tmp
, vec
));
41014 emit_insn (gen_vec_extract_hi_v32hi (tmp
, vec
));
41015 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
41021 if (TARGET_AVX512BW
)
41023 tmp
= gen_reg_rtx (V32QImode
);
41025 emit_insn (gen_vec_extract_lo_v64qi (tmp
, vec
));
41027 emit_insn (gen_vec_extract_hi_v64qi (tmp
, vec
));
41028 ix86_expand_vector_extract (false, target
, tmp
, elt
& 31);
41034 tmp
= gen_reg_rtx (V8SFmode
);
41036 emit_insn (gen_vec_extract_lo_v16sf (tmp
, vec
));
41038 emit_insn (gen_vec_extract_hi_v16sf (tmp
, vec
));
41039 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
41043 tmp
= gen_reg_rtx (V4DFmode
);
41045 emit_insn (gen_vec_extract_lo_v8df (tmp
, vec
));
41047 emit_insn (gen_vec_extract_hi_v8df (tmp
, vec
));
41048 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
41052 tmp
= gen_reg_rtx (V8SImode
);
41054 emit_insn (gen_vec_extract_lo_v16si (tmp
, vec
));
41056 emit_insn (gen_vec_extract_hi_v16si (tmp
, vec
));
41057 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
41061 tmp
= gen_reg_rtx (V4DImode
);
41063 emit_insn (gen_vec_extract_lo_v8di (tmp
, vec
));
41065 emit_insn (gen_vec_extract_hi_v8di (tmp
, vec
));
41066 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
41070 /* ??? Could extract the appropriate HImode element and shift. */
41077 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
41078 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
41080 /* Let the rtl optimizers know about the zero extension performed. */
41081 if (inner_mode
== QImode
|| inner_mode
== HImode
)
41083 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
41084 target
= gen_lowpart (SImode
, target
);
41087 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
41091 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
41093 emit_move_insn (mem
, vec
);
41095 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
41096 emit_move_insn (target
, tmp
);
41100 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41101 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41102 The upper bits of DEST are undefined, though they shouldn't cause
41103 exceptions (some bits from src or all zeros are ok). */
41106 emit_reduc_half (rtx dest
, rtx src
, int i
)
41109 switch (GET_MODE (src
))
41113 tem
= gen_sse_movhlps (dest
, src
, src
);
41115 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
41116 GEN_INT (1 + 4), GEN_INT (1 + 4));
41119 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
41125 d
= gen_reg_rtx (V1TImode
);
41126 tem
= gen_sse2_lshrv1ti3 (d
, gen_lowpart (V1TImode
, src
),
41131 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
41133 tem
= gen_avx_shufps256 (dest
, src
, src
,
41134 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
41138 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
41140 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
41148 if (GET_MODE (dest
) != V4DImode
)
41149 d
= gen_reg_rtx (V4DImode
);
41150 tem
= gen_avx2_permv2ti (d
, gen_lowpart (V4DImode
, src
),
41151 gen_lowpart (V4DImode
, src
),
41156 d
= gen_reg_rtx (V2TImode
);
41157 tem
= gen_avx2_lshrv2ti3 (d
, gen_lowpart (V2TImode
, src
),
41166 tem
= gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode
, dest
),
41167 gen_lowpart (V16SImode
, src
),
41168 gen_lowpart (V16SImode
, src
),
41169 GEN_INT (0x4 + (i
== 512 ? 4 : 0)),
41170 GEN_INT (0x5 + (i
== 512 ? 4 : 0)),
41171 GEN_INT (0x6 + (i
== 512 ? 4 : 0)),
41172 GEN_INT (0x7 + (i
== 512 ? 4 : 0)),
41173 GEN_INT (0xC), GEN_INT (0xD),
41174 GEN_INT (0xE), GEN_INT (0xF),
41175 GEN_INT (0x10), GEN_INT (0x11),
41176 GEN_INT (0x12), GEN_INT (0x13),
41177 GEN_INT (0x14), GEN_INT (0x15),
41178 GEN_INT (0x16), GEN_INT (0x17));
41180 tem
= gen_avx512f_pshufd_1 (gen_lowpart (V16SImode
, dest
),
41181 gen_lowpart (V16SImode
, src
),
41182 GEN_INT (i
== 128 ? 0x2 : 0x1),
41186 GEN_INT (i
== 128 ? 0x6 : 0x5),
41190 GEN_INT (i
== 128 ? 0xA : 0x9),
41194 GEN_INT (i
== 128 ? 0xE : 0xD),
41200 gcc_unreachable ();
41204 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
41207 /* Expand a vector reduction. FN is the binary pattern to reduce;
41208 DEST is the destination; IN is the input vector. */
41211 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
41213 rtx half
, dst
, vec
= in
;
41214 enum machine_mode mode
= GET_MODE (in
);
41217 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41219 && mode
== V8HImode
41220 && fn
== gen_uminv8hi3
)
41222 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
41226 for (i
= GET_MODE_BITSIZE (mode
);
41227 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
41230 half
= gen_reg_rtx (mode
);
41231 emit_reduc_half (half
, vec
, i
);
41232 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
41235 dst
= gen_reg_rtx (mode
);
41236 emit_insn (fn (dst
, half
, vec
));
41241 /* Target hook for scalar_mode_supported_p. */
41243 ix86_scalar_mode_supported_p (enum machine_mode mode
)
41245 if (DECIMAL_FLOAT_MODE_P (mode
))
41246 return default_decimal_float_supported_p ();
41247 else if (mode
== TFmode
)
41250 return default_scalar_mode_supported_p (mode
);
41253 /* Implements target hook vector_mode_supported_p. */
41255 ix86_vector_mode_supported_p (enum machine_mode mode
)
41257 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
41259 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
41261 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
41263 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
41265 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
41267 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
41272 /* Implement target hook libgcc_floating_mode_supported_p. */
41274 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode
)
41284 #ifdef IX86_NO_LIBGCC_TFMODE
41286 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
41287 return TARGET_LONG_DOUBLE_128
;
41297 /* Target hook for c_mode_for_suffix. */
41298 static enum machine_mode
41299 ix86_c_mode_for_suffix (char suffix
)
41309 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41311 We do this in the new i386 backend to maintain source compatibility
41312 with the old cc0-based compiler. */
41315 ix86_md_asm_clobbers (tree
, tree
, tree clobbers
)
41317 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
41319 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
41324 /* Implements target vector targetm.asm.encode_section_info. */
41326 static void ATTRIBUTE_UNUSED
41327 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
41329 default_encode_section_info (decl
, rtl
, first
);
41331 if (TREE_CODE (decl
) == VAR_DECL
41332 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
41333 && ix86_in_large_data_p (decl
))
41334 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
41337 /* Worker function for REVERSE_CONDITION. */
41340 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
41342 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
41343 ? reverse_condition (code
)
41344 : reverse_condition_maybe_unordered (code
));
41347 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41351 output_387_reg_move (rtx insn
, rtx
*operands
)
41353 if (REG_P (operands
[0]))
41355 if (REG_P (operands
[1])
41356 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
41358 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
41359 return output_387_ffreep (operands
, 0);
41360 return "fstp\t%y0";
41362 if (STACK_TOP_P (operands
[0]))
41363 return "fld%Z1\t%y1";
41366 else if (MEM_P (operands
[0]))
41368 gcc_assert (REG_P (operands
[1]));
41369 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
41370 return "fstp%Z0\t%y0";
41373 /* There is no non-popping store to memory for XFmode.
41374 So if we need one, follow the store with a load. */
41375 if (GET_MODE (operands
[0]) == XFmode
)
41376 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41378 return "fst%Z0\t%y0";
41385 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41386 FP status register is set. */
41389 ix86_emit_fp_unordered_jump (rtx label
)
41391 rtx reg
= gen_reg_rtx (HImode
);
41394 emit_insn (gen_x86_fnstsw_1 (reg
));
41396 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
41398 emit_insn (gen_x86_sahf_1 (reg
));
41400 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
41401 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
41405 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
41407 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
41408 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
41411 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
41412 gen_rtx_LABEL_REF (VOIDmode
, label
),
41414 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
41416 emit_jump_insn (temp
);
41417 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
41420 /* Output code to perform a log1p XFmode calculation. */
41422 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
41424 rtx_code_label
*label1
= gen_label_rtx ();
41425 rtx_code_label
*label2
= gen_label_rtx ();
41427 rtx tmp
= gen_reg_rtx (XFmode
);
41428 rtx tmp2
= gen_reg_rtx (XFmode
);
41431 emit_insn (gen_absxf2 (tmp
, op1
));
41432 test
= gen_rtx_GE (VOIDmode
, tmp
,
41433 CONST_DOUBLE_FROM_REAL_VALUE (
41434 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
41436 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
41438 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
41439 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
41440 emit_jump (label2
);
41442 emit_label (label1
);
41443 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
41444 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
41445 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
41446 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
41448 emit_label (label2
);
41451 /* Emit code for round calculation. */
41452 void ix86_emit_i387_round (rtx op0
, rtx op1
)
41454 enum machine_mode inmode
= GET_MODE (op1
);
41455 enum machine_mode outmode
= GET_MODE (op0
);
41456 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
41457 rtx scratch
= gen_reg_rtx (HImode
);
41458 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
41459 rtx_code_label
*jump_label
= gen_label_rtx ();
41461 rtx (*gen_abs
) (rtx
, rtx
);
41462 rtx (*gen_neg
) (rtx
, rtx
);
41467 gen_abs
= gen_abssf2
;
41470 gen_abs
= gen_absdf2
;
41473 gen_abs
= gen_absxf2
;
41476 gcc_unreachable ();
41482 gen_neg
= gen_negsf2
;
41485 gen_neg
= gen_negdf2
;
41488 gen_neg
= gen_negxf2
;
41491 gen_neg
= gen_neghi2
;
41494 gen_neg
= gen_negsi2
;
41497 gen_neg
= gen_negdi2
;
41500 gcc_unreachable ();
41503 e1
= gen_reg_rtx (inmode
);
41504 e2
= gen_reg_rtx (inmode
);
41505 res
= gen_reg_rtx (outmode
);
41507 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
41509 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41511 /* scratch = fxam(op1) */
41512 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
41513 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
41515 /* e1 = fabs(op1) */
41516 emit_insn (gen_abs (e1
, op1
));
41518 /* e2 = e1 + 0.5 */
41519 half
= force_reg (inmode
, half
);
41520 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
41521 gen_rtx_PLUS (inmode
, e1
, half
)));
41523 /* res = floor(e2) */
41524 if (inmode
!= XFmode
)
41526 tmp1
= gen_reg_rtx (XFmode
);
41528 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
41529 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
41539 rtx tmp0
= gen_reg_rtx (XFmode
);
41541 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
41543 emit_insn (gen_rtx_SET (VOIDmode
, res
,
41544 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
41545 UNSPEC_TRUNC_NOOP
)));
41549 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
41552 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
41555 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
41558 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
41561 gcc_unreachable ();
41564 /* flags = signbit(a) */
41565 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
41567 /* if (flags) then res = -res */
41568 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
41569 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
41570 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
41572 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
41573 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
41574 JUMP_LABEL (insn
) = jump_label
;
41576 emit_insn (gen_neg (res
, res
));
41578 emit_label (jump_label
);
41579 LABEL_NUSES (jump_label
) = 1;
41581 emit_move_insn (op0
, res
);
41584 /* Output code to perform a Newton-Rhapson approximation of a single precision
41585 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41587 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
41589 rtx x0
, x1
, e0
, e1
;
41591 x0
= gen_reg_rtx (mode
);
41592 e0
= gen_reg_rtx (mode
);
41593 e1
= gen_reg_rtx (mode
);
41594 x1
= gen_reg_rtx (mode
);
41596 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41598 b
= force_reg (mode
, b
);
41600 /* x0 = rcp(b) estimate */
41601 if (mode
== V16SFmode
|| mode
== V8DFmode
)
41602 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
41603 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
41606 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
41607 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
41611 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
41612 gen_rtx_MULT (mode
, x0
, b
)));
41615 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
41616 gen_rtx_MULT (mode
, x0
, e0
)));
41619 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
41620 gen_rtx_PLUS (mode
, x0
, x0
)));
41623 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
41624 gen_rtx_MINUS (mode
, e1
, e0
)));
41627 emit_insn (gen_rtx_SET (VOIDmode
, res
,
41628 gen_rtx_MULT (mode
, a
, x1
)));
41631 /* Output code to perform a Newton-Rhapson approximation of a
41632 single precision floating point [reciprocal] square root. */
41634 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
41637 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
41641 x0
= gen_reg_rtx (mode
);
41642 e0
= gen_reg_rtx (mode
);
41643 e1
= gen_reg_rtx (mode
);
41644 e2
= gen_reg_rtx (mode
);
41645 e3
= gen_reg_rtx (mode
);
41647 real_from_integer (&r
, VOIDmode
, -3, SIGNED
);
41648 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
41650 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
41651 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
41652 unspec
= UNSPEC_RSQRT
;
41654 if (VECTOR_MODE_P (mode
))
41656 mthree
= ix86_build_const_vector (mode
, true, mthree
);
41657 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
41658 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41659 if (GET_MODE_SIZE (mode
) == 64)
41660 unspec
= UNSPEC_RSQRT14
;
41663 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41664 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41666 a
= force_reg (mode
, a
);
41668 /* x0 = rsqrt(a) estimate */
41669 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
41670 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
41673 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41678 zero
= gen_reg_rtx (mode
);
41679 mask
= gen_reg_rtx (mode
);
41681 zero
= force_reg (mode
, CONST0_RTX(mode
));
41683 /* Handle masked compare. */
41684 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
41686 mask
= gen_reg_rtx (HImode
);
41687 /* Imm value 0x4 corresponds to not-equal comparison. */
41688 emit_insn (gen_avx512f_cmpv16sf3 (mask
, zero
, a
, GEN_INT (0x4)));
41689 emit_insn (gen_avx512f_blendmv16sf (x0
, zero
, x0
, mask
));
41693 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
41694 gen_rtx_NE (mode
, zero
, a
)));
41696 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
41697 gen_rtx_AND (mode
, x0
, mask
)));
41702 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
41703 gen_rtx_MULT (mode
, x0
, a
)));
41705 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
41706 gen_rtx_MULT (mode
, e0
, x0
)));
41709 mthree
= force_reg (mode
, mthree
);
41710 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
41711 gen_rtx_PLUS (mode
, e1
, mthree
)));
41713 mhalf
= force_reg (mode
, mhalf
);
41715 /* e3 = -.5 * x0 */
41716 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
41717 gen_rtx_MULT (mode
, x0
, mhalf
)));
41719 /* e3 = -.5 * e0 */
41720 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
41721 gen_rtx_MULT (mode
, e0
, mhalf
)));
41722 /* ret = e2 * e3 */
41723 emit_insn (gen_rtx_SET (VOIDmode
, res
,
41724 gen_rtx_MULT (mode
, e2
, e3
)));
41727 #ifdef TARGET_SOLARIS
41728 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41731 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
41734 /* With Binutils 2.15, the "@unwind" marker must be specified on
41735 every occurrence of the ".eh_frame" section, not just the first
41738 && strcmp (name
, ".eh_frame") == 0)
41740 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
41741 flags
& SECTION_WRITE
? "aw" : "a");
41746 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
41748 solaris_elf_asm_comdat_section (name
, flags
, decl
);
41753 default_elf_asm_named_section (name
, flags
, decl
);
41755 #endif /* TARGET_SOLARIS */
41757 /* Return the mangling of TYPE if it is an extended fundamental type. */
41759 static const char *
41760 ix86_mangle_type (const_tree type
)
41762 type
= TYPE_MAIN_VARIANT (type
);
41764 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
41765 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
41768 switch (TYPE_MODE (type
))
41771 /* __float128 is "g". */
41774 /* "long double" or __float80 is "e". */
41781 /* For 32-bit code we can save PIC register setup by using
41782 __stack_chk_fail_local hidden function instead of calling
41783 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41784 register, so it is better to call __stack_chk_fail directly. */
41786 static tree ATTRIBUTE_UNUSED
41787 ix86_stack_protect_fail (void)
41789 return TARGET_64BIT
41790 ? default_external_stack_protect_fail ()
41791 : default_hidden_stack_protect_fail ();
41794 /* Select a format to encode pointers in exception handling data. CODE
41795 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41796 true if the symbol may be affected by dynamic relocations.
41798 ??? All x86 object file formats are capable of representing this.
41799 After all, the relocation needed is the same as for the call insn.
41800 Whether or not a particular assembler allows us to enter such, I
41801 guess we'll have to see. */
41803 asm_preferred_eh_data_format (int code
, int global
)
41807 int type
= DW_EH_PE_sdata8
;
41809 || ix86_cmodel
== CM_SMALL_PIC
41810 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
41811 type
= DW_EH_PE_sdata4
;
41812 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
41814 if (ix86_cmodel
== CM_SMALL
41815 || (ix86_cmodel
== CM_MEDIUM
&& code
))
41816 return DW_EH_PE_udata4
;
41817 return DW_EH_PE_absptr
;
41820 /* Expand copysign from SIGN to the positive value ABS_VALUE
41821 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41824 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
41826 enum machine_mode mode
= GET_MODE (sign
);
41827 rtx sgn
= gen_reg_rtx (mode
);
41828 if (mask
== NULL_RTX
)
41830 enum machine_mode vmode
;
41832 if (mode
== SFmode
)
41834 else if (mode
== DFmode
)
41839 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
41840 if (!VECTOR_MODE_P (mode
))
41842 /* We need to generate a scalar mode mask in this case. */
41843 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
41844 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
41845 mask
= gen_reg_rtx (mode
);
41846 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
41850 mask
= gen_rtx_NOT (mode
, mask
);
41851 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
41852 gen_rtx_AND (mode
, mask
, sign
)));
41853 emit_insn (gen_rtx_SET (VOIDmode
, result
,
41854 gen_rtx_IOR (mode
, abs_value
, sgn
)));
41857 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41858 mask for masking out the sign-bit is stored in *SMASK, if that is
41861 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
41863 enum machine_mode vmode
, mode
= GET_MODE (op0
);
41866 xa
= gen_reg_rtx (mode
);
41867 if (mode
== SFmode
)
41869 else if (mode
== DFmode
)
41873 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
41874 if (!VECTOR_MODE_P (mode
))
41876 /* We need to generate a scalar mode mask in this case. */
41877 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
41878 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
41879 mask
= gen_reg_rtx (mode
);
41880 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
41882 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
41883 gen_rtx_AND (mode
, op0
, mask
)));
41891 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41892 swapping the operands if SWAP_OPERANDS is true. The expanded
41893 code is a forward jump to a newly created label in case the
41894 comparison is true. The generated label rtx is returned. */
41895 static rtx_code_label
*
41896 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
41897 bool swap_operands
)
41899 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
41900 rtx_code_label
*label
;
41910 label
= gen_label_rtx ();
41911 tmp
= gen_rtx_REG (fpcmp_mode
, FLAGS_REG
);
41912 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
41913 gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
)));
41914 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
41915 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
41916 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
41917 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
41918 JUMP_LABEL (tmp
) = label
;
41923 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41924 using comparison code CODE. Operands are swapped for the comparison if
41925 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41927 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
41928 bool swap_operands
)
41930 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
41931 enum machine_mode mode
= GET_MODE (op0
);
41932 rtx mask
= gen_reg_rtx (mode
);
41941 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
41943 emit_insn (insn (mask
, op0
, op1
,
41944 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
41948 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41949 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41951 ix86_gen_TWO52 (enum machine_mode mode
)
41953 REAL_VALUE_TYPE TWO52r
;
41956 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
41957 TWO52
= const_double_from_real_value (TWO52r
, mode
);
41958 TWO52
= force_reg (mode
, TWO52
);
41963 /* Expand SSE sequence for computing lround from OP1 storing
41966 ix86_expand_lround (rtx op0
, rtx op1
)
41968 /* C code for the stuff we're doing below:
41969 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41972 enum machine_mode mode
= GET_MODE (op1
);
41973 const struct real_format
*fmt
;
41974 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
41977 /* load nextafter (0.5, 0.0) */
41978 fmt
= REAL_MODE_FORMAT (mode
);
41979 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
41980 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
41982 /* adj = copysign (0.5, op1) */
41983 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
41984 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
41986 /* adj = op1 + adj */
41987 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
41989 /* op0 = (imode)adj */
41990 expand_fix (op0
, adj
, 0);
41993 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41996 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
41998 /* C code for the stuff we're doing below (for do_floor):
42000 xi -= (double)xi > op1 ? 1 : 0;
42003 enum machine_mode fmode
= GET_MODE (op1
);
42004 enum machine_mode imode
= GET_MODE (op0
);
42005 rtx ireg
, freg
, tmp
;
42006 rtx_code_label
*label
;
42008 /* reg = (long)op1 */
42009 ireg
= gen_reg_rtx (imode
);
42010 expand_fix (ireg
, op1
, 0);
42012 /* freg = (double)reg */
42013 freg
= gen_reg_rtx (fmode
);
42014 expand_float (freg
, ireg
, 0);
42016 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42017 label
= ix86_expand_sse_compare_and_jump (UNLE
,
42018 freg
, op1
, !do_floor
);
42019 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
42020 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
42021 emit_move_insn (ireg
, tmp
);
42023 emit_label (label
);
42024 LABEL_NUSES (label
) = 1;
42026 emit_move_insn (op0
, ireg
);
42029 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42030 result in OPERAND0. */
42032 ix86_expand_rint (rtx operand0
, rtx operand1
)
42034 /* C code for the stuff we're doing below:
42035 xa = fabs (operand1);
42036 if (!isless (xa, 2**52))
42038 xa = xa + 2**52 - 2**52;
42039 return copysign (xa, operand1);
42041 enum machine_mode mode
= GET_MODE (operand0
);
42042 rtx res
, xa
, TWO52
, mask
;
42043 rtx_code_label
*label
;
42045 res
= gen_reg_rtx (mode
);
42046 emit_move_insn (res
, operand1
);
42048 /* xa = abs (operand1) */
42049 xa
= ix86_expand_sse_fabs (res
, &mask
);
42051 /* if (!isless (xa, TWO52)) goto label; */
42052 TWO52
= ix86_gen_TWO52 (mode
);
42053 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
42055 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
42056 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
42058 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
42060 emit_label (label
);
42061 LABEL_NUSES (label
) = 1;
42063 emit_move_insn (operand0
, res
);
42066 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42069 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
42071 /* C code for the stuff we expand below.
42072 double xa = fabs (x), x2;
42073 if (!isless (xa, TWO52))
42075 xa = xa + TWO52 - TWO52;
42076 x2 = copysign (xa, x);
42085 enum machine_mode mode
= GET_MODE (operand0
);
42086 rtx xa
, TWO52
, tmp
, one
, res
, mask
;
42087 rtx_code_label
*label
;
42089 TWO52
= ix86_gen_TWO52 (mode
);
42091 /* Temporary for holding the result, initialized to the input
42092 operand to ease control flow. */
42093 res
= gen_reg_rtx (mode
);
42094 emit_move_insn (res
, operand1
);
42096 /* xa = abs (operand1) */
42097 xa
= ix86_expand_sse_fabs (res
, &mask
);
42099 /* if (!isless (xa, TWO52)) goto label; */
42100 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
42102 /* xa = xa + TWO52 - TWO52; */
42103 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
42104 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
42106 /* xa = copysign (xa, operand1) */
42107 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
42109 /* generate 1.0 or -1.0 */
42110 one
= force_reg (mode
,
42111 const_double_from_real_value (do_floor
42112 ? dconst1
: dconstm1
, mode
));
42114 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42115 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
42116 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
42117 gen_rtx_AND (mode
, one
, tmp
)));
42118 /* We always need to subtract here to preserve signed zero. */
42119 tmp
= expand_simple_binop (mode
, MINUS
,
42120 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
42121 emit_move_insn (res
, tmp
);
42123 emit_label (label
);
42124 LABEL_NUSES (label
) = 1;
42126 emit_move_insn (operand0
, res
);
42129 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42132 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
42134 /* C code for the stuff we expand below.
42135 double xa = fabs (x), x2;
42136 if (!isless (xa, TWO52))
42138 x2 = (double)(long)x;
42145 if (HONOR_SIGNED_ZEROS (mode))
42146 return copysign (x2, x);
42149 enum machine_mode mode
= GET_MODE (operand0
);
42150 rtx xa
, xi
, TWO52
, tmp
, one
, res
, mask
;
42151 rtx_code_label
*label
;
42153 TWO52
= ix86_gen_TWO52 (mode
);
42155 /* Temporary for holding the result, initialized to the input
42156 operand to ease control flow. */
42157 res
= gen_reg_rtx (mode
);
42158 emit_move_insn (res
, operand1
);
42160 /* xa = abs (operand1) */
42161 xa
= ix86_expand_sse_fabs (res
, &mask
);
42163 /* if (!isless (xa, TWO52)) goto label; */
42164 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
42166 /* xa = (double)(long)x */
42167 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
42168 expand_fix (xi
, res
, 0);
42169 expand_float (xa
, xi
, 0);
42172 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
42174 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42175 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
42176 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
42177 gen_rtx_AND (mode
, one
, tmp
)));
42178 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
42179 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
42180 emit_move_insn (res
, tmp
);
42182 if (HONOR_SIGNED_ZEROS (mode
))
42183 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
42185 emit_label (label
);
42186 LABEL_NUSES (label
) = 1;
42188 emit_move_insn (operand0
, res
);
42191 /* Expand SSE sequence for computing round from OPERAND1 storing
42192 into OPERAND0. Sequence that works without relying on DImode truncation
42193 via cvttsd2siq that is only available on 64bit targets. */
42195 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
42197 /* C code for the stuff we expand below.
42198 double xa = fabs (x), xa2, x2;
42199 if (!isless (xa, TWO52))
42201 Using the absolute value and copying back sign makes
42202 -0.0 -> -0.0 correct.
42203 xa2 = xa + TWO52 - TWO52;
42208 else if (dxa > 0.5)
42210 x2 = copysign (xa2, x);
42213 enum machine_mode mode
= GET_MODE (operand0
);
42214 rtx xa
, xa2
, dxa
, TWO52
, tmp
, half
, mhalf
, one
, res
, mask
;
42215 rtx_code_label
*label
;
42217 TWO52
= ix86_gen_TWO52 (mode
);
42219 /* Temporary for holding the result, initialized to the input
42220 operand to ease control flow. */
42221 res
= gen_reg_rtx (mode
);
42222 emit_move_insn (res
, operand1
);
42224 /* xa = abs (operand1) */
42225 xa
= ix86_expand_sse_fabs (res
, &mask
);
42227 /* if (!isless (xa, TWO52)) goto label; */
42228 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
42230 /* xa2 = xa + TWO52 - TWO52; */
42231 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
42232 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
42234 /* dxa = xa2 - xa; */
42235 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
42237 /* generate 0.5, 1.0 and -0.5 */
42238 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
42239 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
42240 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
42244 tmp
= gen_reg_rtx (mode
);
42245 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42246 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
42247 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
42248 gen_rtx_AND (mode
, one
, tmp
)));
42249 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
42250 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42251 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
42252 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
42253 gen_rtx_AND (mode
, one
, tmp
)));
42254 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
42256 /* res = copysign (xa2, operand1) */
42257 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
42259 emit_label (label
);
42260 LABEL_NUSES (label
) = 1;
42262 emit_move_insn (operand0
, res
);
42265 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42268 ix86_expand_trunc (rtx operand0
, rtx operand1
)
42270 /* C code for SSE variant we expand below.
42271 double xa = fabs (x), x2;
42272 if (!isless (xa, TWO52))
42274 x2 = (double)(long)x;
42275 if (HONOR_SIGNED_ZEROS (mode))
42276 return copysign (x2, x);
42279 enum machine_mode mode
= GET_MODE (operand0
);
42280 rtx xa
, xi
, TWO52
, res
, mask
;
42281 rtx_code_label
*label
;
42283 TWO52
= ix86_gen_TWO52 (mode
);
42285 /* Temporary for holding the result, initialized to the input
42286 operand to ease control flow. */
42287 res
= gen_reg_rtx (mode
);
42288 emit_move_insn (res
, operand1
);
42290 /* xa = abs (operand1) */
42291 xa
= ix86_expand_sse_fabs (res
, &mask
);
42293 /* if (!isless (xa, TWO52)) goto label; */
42294 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
42296 /* x = (double)(long)x */
42297 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
42298 expand_fix (xi
, res
, 0);
42299 expand_float (res
, xi
, 0);
42301 if (HONOR_SIGNED_ZEROS (mode
))
42302 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
42304 emit_label (label
);
42305 LABEL_NUSES (label
) = 1;
42307 emit_move_insn (operand0
, res
);
42310 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42313 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
42315 enum machine_mode mode
= GET_MODE (operand0
);
42316 rtx xa
, mask
, TWO52
, one
, res
, smask
, tmp
;
42317 rtx_code_label
*label
;
42319 /* C code for SSE variant we expand below.
42320 double xa = fabs (x), x2;
42321 if (!isless (xa, TWO52))
42323 xa2 = xa + TWO52 - TWO52;
42327 x2 = copysign (xa2, x);
42331 TWO52
= ix86_gen_TWO52 (mode
);
42333 /* Temporary for holding the result, initialized to the input
42334 operand to ease control flow. */
42335 res
= gen_reg_rtx (mode
);
42336 emit_move_insn (res
, operand1
);
42338 /* xa = abs (operand1) */
42339 xa
= ix86_expand_sse_fabs (res
, &smask
);
42341 /* if (!isless (xa, TWO52)) goto label; */
42342 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
42344 /* res = xa + TWO52 - TWO52; */
42345 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
42346 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
42347 emit_move_insn (res
, tmp
);
42350 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
42352 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42353 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
42354 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
42355 gen_rtx_AND (mode
, mask
, one
)));
42356 tmp
= expand_simple_binop (mode
, MINUS
,
42357 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
42358 emit_move_insn (res
, tmp
);
42360 /* res = copysign (res, operand1) */
42361 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
42363 emit_label (label
);
42364 LABEL_NUSES (label
) = 1;
42366 emit_move_insn (operand0
, res
);
42369 /* Expand SSE sequence for computing round from OPERAND1 storing
42372 ix86_expand_round (rtx operand0
, rtx operand1
)
42374 /* C code for the stuff we're doing below:
42375 double xa = fabs (x);
42376 if (!isless (xa, TWO52))
42378 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42379 return copysign (xa, x);
42381 enum machine_mode mode
= GET_MODE (operand0
);
42382 rtx res
, TWO52
, xa
, xi
, half
, mask
;
42383 rtx_code_label
*label
;
42384 const struct real_format
*fmt
;
42385 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
42387 /* Temporary for holding the result, initialized to the input
42388 operand to ease control flow. */
42389 res
= gen_reg_rtx (mode
);
42390 emit_move_insn (res
, operand1
);
42392 TWO52
= ix86_gen_TWO52 (mode
);
42393 xa
= ix86_expand_sse_fabs (res
, &mask
);
42394 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
42396 /* load nextafter (0.5, 0.0) */
42397 fmt
= REAL_MODE_FORMAT (mode
);
42398 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
42399 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
42401 /* xa = xa + 0.5 */
42402 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
42403 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
42405 /* xa = (double)(int64_t)xa */
42406 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
42407 expand_fix (xi
, xa
, 0);
42408 expand_float (xa
, xi
, 0);
42410 /* res = copysign (xa, operand1) */
42411 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
42413 emit_label (label
);
42414 LABEL_NUSES (label
) = 1;
42416 emit_move_insn (operand0
, res
);
42419 /* Expand SSE sequence for computing round
42420 from OP1 storing into OP0 using sse4 round insn. */
42422 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
42424 enum machine_mode mode
= GET_MODE (op0
);
42425 rtx e1
, e2
, res
, half
;
42426 const struct real_format
*fmt
;
42427 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
42428 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
42429 rtx (*gen_round
) (rtx
, rtx
, rtx
);
42434 gen_copysign
= gen_copysignsf3
;
42435 gen_round
= gen_sse4_1_roundsf2
;
42438 gen_copysign
= gen_copysigndf3
;
42439 gen_round
= gen_sse4_1_rounddf2
;
42442 gcc_unreachable ();
42445 /* round (a) = trunc (a + copysign (0.5, a)) */
42447 /* load nextafter (0.5, 0.0) */
42448 fmt
= REAL_MODE_FORMAT (mode
);
42449 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
42450 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
42451 half
= const_double_from_real_value (pred_half
, mode
);
42453 /* e1 = copysign (0.5, op1) */
42454 e1
= gen_reg_rtx (mode
);
42455 emit_insn (gen_copysign (e1
, half
, op1
));
42457 /* e2 = op1 + e1 */
42458 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
42460 /* res = trunc (e2) */
42461 res
= gen_reg_rtx (mode
);
42462 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
42464 emit_move_insn (op0
, res
);
42468 /* Table of valid machine attributes. */
42469 static const struct attribute_spec ix86_attribute_table
[] =
42471 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42472 affects_type_identity } */
42473 /* Stdcall attribute says callee is responsible for popping arguments
42474 if they are not variable. */
42475 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
42477 /* Fastcall attribute says callee is responsible for popping arguments
42478 if they are not variable. */
42479 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
42481 /* Thiscall attribute says callee is responsible for popping arguments
42482 if they are not variable. */
42483 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
42485 /* Cdecl attribute says the callee is a normal C declaration */
42486 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
42488 /* Regparm attribute specifies how many integer arguments are to be
42489 passed in registers. */
42490 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
42492 /* Sseregparm attribute says we are using x86_64 calling conventions
42493 for FP arguments. */
42494 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
42496 /* The transactional memory builtins are implicitly regparm or fastcall
42497 depending on the ABI. Override the generic do-nothing attribute that
42498 these builtins were declared with. */
42499 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
42501 /* force_align_arg_pointer says this function realigns the stack at entry. */
42502 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
42503 false, true, true, ix86_handle_cconv_attribute
, false },
42504 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42505 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
42506 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
42507 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
42510 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
42512 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
42514 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42515 SUBTARGET_ATTRIBUTE_TABLE
,
42517 /* ms_abi and sysv_abi calling convention function attributes. */
42518 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
42519 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
42520 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
42522 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42523 ix86_handle_callee_pop_aggregate_return
, true },
42525 { NULL
, 0, 0, false, false, false, NULL
, false }
42528 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42530 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
42535 switch (type_of_cost
)
42538 return ix86_cost
->scalar_stmt_cost
;
42541 return ix86_cost
->scalar_load_cost
;
42544 return ix86_cost
->scalar_store_cost
;
42547 return ix86_cost
->vec_stmt_cost
;
42550 return ix86_cost
->vec_align_load_cost
;
42553 return ix86_cost
->vec_store_cost
;
42555 case vec_to_scalar
:
42556 return ix86_cost
->vec_to_scalar_cost
;
42558 case scalar_to_vec
:
42559 return ix86_cost
->scalar_to_vec_cost
;
42561 case unaligned_load
:
42562 case unaligned_store
:
42563 return ix86_cost
->vec_unalign_load_cost
;
42565 case cond_branch_taken
:
42566 return ix86_cost
->cond_taken_branch_cost
;
42568 case cond_branch_not_taken
:
42569 return ix86_cost
->cond_not_taken_branch_cost
;
42572 case vec_promote_demote
:
42573 return ix86_cost
->vec_stmt_cost
;
42575 case vec_construct
:
42576 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
42577 return elements
/ 2 + 1;
42580 gcc_unreachable ();
42584 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42585 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42586 insn every time. */
42588 static GTY(()) rtx_insn
*vselect_insn
;
42590 /* Initialize vselect_insn. */
42593 init_vselect_insn (void)
42598 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
42599 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
42600 XVECEXP (x
, 0, i
) = const0_rtx
;
42601 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
42603 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
42605 vselect_insn
= emit_insn (x
);
42609 /* Construct (set target (vec_select op0 (parallel perm))) and
42610 return true if that's a valid instruction in the active ISA. */
42613 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
42614 unsigned nelt
, bool testing_p
)
42617 rtx x
, save_vconcat
;
42620 if (vselect_insn
== NULL_RTX
)
42621 init_vselect_insn ();
42623 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
42624 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
42625 for (i
= 0; i
< nelt
; ++i
)
42626 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
42627 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
42628 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
42629 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
42630 SET_DEST (PATTERN (vselect_insn
)) = target
;
42631 icode
= recog_memoized (vselect_insn
);
42633 if (icode
>= 0 && !testing_p
)
42634 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
42636 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
42637 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
42638 INSN_CODE (vselect_insn
) = -1;
42643 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42646 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
42647 const unsigned char *perm
, unsigned nelt
,
42650 enum machine_mode v2mode
;
42654 if (vselect_insn
== NULL_RTX
)
42655 init_vselect_insn ();
42657 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
42658 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
42659 PUT_MODE (x
, v2mode
);
42662 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
42663 XEXP (x
, 0) = const0_rtx
;
42664 XEXP (x
, 1) = const0_rtx
;
42668 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42669 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42672 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
42674 enum machine_mode vmode
= d
->vmode
;
42675 unsigned i
, mask
, nelt
= d
->nelt
;
42676 rtx target
, op0
, op1
, x
;
42677 rtx rperm
[32], vperm
;
42679 if (d
->one_operand_p
)
42681 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
42683 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
42685 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
42690 /* This is a blend, not a permute. Elements must stay in their
42691 respective lanes. */
42692 for (i
= 0; i
< nelt
; ++i
)
42694 unsigned e
= d
->perm
[i
];
42695 if (!(e
== i
|| e
== i
+ nelt
))
42702 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42703 decision should be extracted elsewhere, so that we only try that
42704 sequence once all budget==3 options have been tried. */
42705 target
= d
->target
;
42718 for (i
= 0; i
< nelt
; ++i
)
42719 mask
|= (d
->perm
[i
] >= nelt
) << i
;
42723 for (i
= 0; i
< 2; ++i
)
42724 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
42729 for (i
= 0; i
< 4; ++i
)
42730 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
42735 /* See if bytes move in pairs so we can use pblendw with
42736 an immediate argument, rather than pblendvb with a vector
42738 for (i
= 0; i
< 16; i
+= 2)
42739 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
42742 for (i
= 0; i
< nelt
; ++i
)
42743 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
42746 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
42747 vperm
= force_reg (vmode
, vperm
);
42749 if (GET_MODE_SIZE (vmode
) == 16)
42750 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
42752 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
42753 if (target
!= d
->target
)
42754 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
42758 for (i
= 0; i
< 8; ++i
)
42759 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
42764 target
= gen_reg_rtx (vmode
);
42765 op0
= gen_lowpart (vmode
, op0
);
42766 op1
= gen_lowpart (vmode
, op1
);
42770 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42771 for (i
= 0; i
< 32; i
+= 2)
42772 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
42774 /* See if bytes move in quadruplets. If yes, vpblendd
42775 with immediate can be used. */
42776 for (i
= 0; i
< 32; i
+= 4)
42777 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
42781 /* See if bytes move the same in both lanes. If yes,
42782 vpblendw with immediate can be used. */
42783 for (i
= 0; i
< 16; i
+= 2)
42784 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
42787 /* Use vpblendw. */
42788 for (i
= 0; i
< 16; ++i
)
42789 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
42794 /* Use vpblendd. */
42795 for (i
= 0; i
< 8; ++i
)
42796 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
42801 /* See if words move in pairs. If yes, vpblendd can be used. */
42802 for (i
= 0; i
< 16; i
+= 2)
42803 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
42807 /* See if words move the same in both lanes. If not,
42808 vpblendvb must be used. */
42809 for (i
= 0; i
< 8; i
++)
42810 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
42812 /* Use vpblendvb. */
42813 for (i
= 0; i
< 32; ++i
)
42814 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
42818 target
= gen_reg_rtx (vmode
);
42819 op0
= gen_lowpart (vmode
, op0
);
42820 op1
= gen_lowpart (vmode
, op1
);
42821 goto finish_pblendvb
;
42824 /* Use vpblendw. */
42825 for (i
= 0; i
< 16; ++i
)
42826 mask
|= (d
->perm
[i
] >= 16) << i
;
42830 /* Use vpblendd. */
42831 for (i
= 0; i
< 8; ++i
)
42832 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
42837 /* Use vpblendd. */
42838 for (i
= 0; i
< 4; ++i
)
42839 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
42844 gcc_unreachable ();
42847 /* This matches five different patterns with the different modes. */
42848 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
42849 x
= gen_rtx_SET (VOIDmode
, target
, x
);
42851 if (target
!= d
->target
)
42852 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
42857 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42858 in terms of the variable form of vpermilps.
42860 Note that we will have already failed the immediate input vpermilps,
42861 which requires that the high and low part shuffle be identical; the
42862 variable form doesn't require that. */
42865 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
42867 rtx rperm
[8], vperm
;
42870 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
42873 /* We can only permute within the 128-bit lane. */
42874 for (i
= 0; i
< 8; ++i
)
42876 unsigned e
= d
->perm
[i
];
42877 if (i
< 4 ? e
>= 4 : e
< 4)
42884 for (i
= 0; i
< 8; ++i
)
42886 unsigned e
= d
->perm
[i
];
42888 /* Within each 128-bit lane, the elements of op0 are numbered
42889 from 0 and the elements of op1 are numbered from 4. */
42895 rperm
[i
] = GEN_INT (e
);
42898 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
42899 vperm
= force_reg (V8SImode
, vperm
);
42900 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
42905 /* Return true if permutation D can be performed as VMODE permutation
42909 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
42911 unsigned int i
, j
, chunk
;
42913 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
42914 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
42915 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
42918 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
42921 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
42922 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
42923 if (d
->perm
[i
] & (chunk
- 1))
42926 for (j
= 1; j
< chunk
; ++j
)
42927 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
42933 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42934 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42937 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
42939 unsigned i
, nelt
, eltsz
, mask
;
42940 unsigned char perm
[32];
42941 enum machine_mode vmode
= V16QImode
;
42942 rtx rperm
[32], vperm
, target
, op0
, op1
;
42946 if (!d
->one_operand_p
)
42948 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
42951 && valid_perm_using_mode_p (V2TImode
, d
))
42956 /* Use vperm2i128 insn. The pattern uses
42957 V4DImode instead of V2TImode. */
42958 target
= d
->target
;
42959 if (d
->vmode
!= V4DImode
)
42960 target
= gen_reg_rtx (V4DImode
);
42961 op0
= gen_lowpart (V4DImode
, d
->op0
);
42962 op1
= gen_lowpart (V4DImode
, d
->op1
);
42964 = GEN_INT ((d
->perm
[0] / (nelt
/ 2))
42965 | ((d
->perm
[nelt
/ 2] / (nelt
/ 2)) * 16));
42966 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
42967 if (target
!= d
->target
)
42968 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
42976 if (GET_MODE_SIZE (d
->vmode
) == 16)
42981 else if (GET_MODE_SIZE (d
->vmode
) == 32)
42986 /* V4DImode should be already handled through
42987 expand_vselect by vpermq instruction. */
42988 gcc_assert (d
->vmode
!= V4DImode
);
42991 if (d
->vmode
== V8SImode
42992 || d
->vmode
== V16HImode
42993 || d
->vmode
== V32QImode
)
42995 /* First see if vpermq can be used for
42996 V8SImode/V16HImode/V32QImode. */
42997 if (valid_perm_using_mode_p (V4DImode
, d
))
42999 for (i
= 0; i
< 4; i
++)
43000 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
43003 target
= gen_reg_rtx (V4DImode
);
43004 if (expand_vselect (target
, gen_lowpart (V4DImode
, d
->op0
),
43007 emit_move_insn (d
->target
,
43008 gen_lowpart (d
->vmode
, target
));
43014 /* Next see if vpermd can be used. */
43015 if (valid_perm_using_mode_p (V8SImode
, d
))
43018 /* Or if vpermps can be used. */
43019 else if (d
->vmode
== V8SFmode
)
43022 if (vmode
== V32QImode
)
43024 /* vpshufb only works intra lanes, it is not
43025 possible to shuffle bytes in between the lanes. */
43026 for (i
= 0; i
< nelt
; ++i
)
43027 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
43038 if (vmode
== V8SImode
)
43039 for (i
= 0; i
< 8; ++i
)
43040 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
43043 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
43044 if (!d
->one_operand_p
)
43045 mask
= 2 * nelt
- 1;
43046 else if (vmode
== V16QImode
)
43049 mask
= nelt
/ 2 - 1;
43051 for (i
= 0; i
< nelt
; ++i
)
43053 unsigned j
, e
= d
->perm
[i
] & mask
;
43054 for (j
= 0; j
< eltsz
; ++j
)
43055 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
43059 vperm
= gen_rtx_CONST_VECTOR (vmode
,
43060 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
43061 vperm
= force_reg (vmode
, vperm
);
43063 target
= d
->target
;
43064 if (d
->vmode
!= vmode
)
43065 target
= gen_reg_rtx (vmode
);
43066 op0
= gen_lowpart (vmode
, d
->op0
);
43067 if (d
->one_operand_p
)
43069 if (vmode
== V16QImode
)
43070 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
43071 else if (vmode
== V32QImode
)
43072 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
43073 else if (vmode
== V8SFmode
)
43074 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
43076 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
43080 op1
= gen_lowpart (vmode
, d
->op1
);
43081 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
43083 if (target
!= d
->target
)
43084 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
43089 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43090 in a single instruction. */
43093 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
43095 unsigned i
, nelt
= d
->nelt
;
43096 unsigned char perm2
[MAX_VECT_LEN
];
43098 /* Check plain VEC_SELECT first, because AVX has instructions that could
43099 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43100 input where SEL+CONCAT may not. */
43101 if (d
->one_operand_p
)
43103 int mask
= nelt
- 1;
43104 bool identity_perm
= true;
43105 bool broadcast_perm
= true;
43107 for (i
= 0; i
< nelt
; i
++)
43109 perm2
[i
] = d
->perm
[i
] & mask
;
43111 identity_perm
= false;
43113 broadcast_perm
= false;
43119 emit_move_insn (d
->target
, d
->op0
);
43122 else if (broadcast_perm
&& TARGET_AVX2
)
43124 /* Use vpbroadcast{b,w,d}. */
43125 rtx (*gen
) (rtx
, rtx
) = NULL
;
43129 gen
= gen_avx2_pbroadcastv32qi_1
;
43132 gen
= gen_avx2_pbroadcastv16hi_1
;
43135 gen
= gen_avx2_pbroadcastv8si_1
;
43138 gen
= gen_avx2_pbroadcastv16qi
;
43141 gen
= gen_avx2_pbroadcastv8hi
;
43144 gen
= gen_avx2_vec_dupv8sf_1
;
43146 /* For other modes prefer other shuffles this function creates. */
43152 emit_insn (gen (d
->target
, d
->op0
));
43157 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
43160 /* There are plenty of patterns in sse.md that are written for
43161 SEL+CONCAT and are not replicated for a single op. Perhaps
43162 that should be changed, to avoid the nastiness here. */
43164 /* Recognize interleave style patterns, which means incrementing
43165 every other permutation operand. */
43166 for (i
= 0; i
< nelt
; i
+= 2)
43168 perm2
[i
] = d
->perm
[i
] & mask
;
43169 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
43171 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
43175 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43178 for (i
= 0; i
< nelt
; i
+= 4)
43180 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
43181 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
43182 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
43183 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
43186 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
43192 /* Finally, try the fully general two operand permute. */
43193 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
43197 /* Recognize interleave style patterns with reversed operands. */
43198 if (!d
->one_operand_p
)
43200 for (i
= 0; i
< nelt
; ++i
)
43202 unsigned e
= d
->perm
[i
];
43210 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
43215 /* Try the SSE4.1 blend variable merge instructions. */
43216 if (expand_vec_perm_blend (d
))
43219 /* Try one of the AVX vpermil variable permutations. */
43220 if (expand_vec_perm_vpermil (d
))
43223 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43224 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43225 if (expand_vec_perm_pshufb (d
))
43228 /* Try the AVX512F vpermi2 instructions. */
43230 enum machine_mode mode
= d
->vmode
;
43231 if (mode
== V8DFmode
)
43233 else if (mode
== V16SFmode
)
43235 for (i
= 0; i
< nelt
; ++i
)
43236 vec
[i
] = GEN_INT (d
->perm
[i
]);
43237 rtx mask
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt
, vec
));
43238 if (ix86_expand_vec_perm_vpermi2 (d
->target
, d
->op0
, mask
, d
->op1
))
43244 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43245 in terms of a pair of pshuflw + pshufhw instructions. */
43248 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
43250 unsigned char perm2
[MAX_VECT_LEN
];
43254 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
43257 /* The two permutations only operate in 64-bit lanes. */
43258 for (i
= 0; i
< 4; ++i
)
43259 if (d
->perm
[i
] >= 4)
43261 for (i
= 4; i
< 8; ++i
)
43262 if (d
->perm
[i
] < 4)
43268 /* Emit the pshuflw. */
43269 memcpy (perm2
, d
->perm
, 4);
43270 for (i
= 4; i
< 8; ++i
)
43272 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
43275 /* Emit the pshufhw. */
43276 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
43277 for (i
= 0; i
< 4; ++i
)
43279 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
43285 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43286 the permutation using the SSSE3 palignr instruction. This succeeds
43287 when all of the elements in PERM fit within one vector and we merely
43288 need to shift them down so that a single vector permutation has a
43289 chance to succeed. */
43292 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
43294 unsigned i
, nelt
= d
->nelt
;
43298 struct expand_vec_perm_d dcopy
;
43300 /* Even with AVX, palignr only operates on 128-bit vectors. */
43301 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
43304 min
= nelt
, max
= 0;
43305 for (i
= 0; i
< nelt
; ++i
)
43307 unsigned e
= d
->perm
[i
];
43313 if (min
== 0 || max
- min
>= nelt
)
43316 /* Given that we have SSSE3, we know we'll be able to implement the
43317 single operand permutation after the palignr with pshufb. */
43322 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
43323 target
= gen_reg_rtx (TImode
);
43324 emit_insn (gen_ssse3_palignrti (target
, gen_lowpart (TImode
, d
->op1
),
43325 gen_lowpart (TImode
, d
->op0
), shift
));
43327 dcopy
.op0
= dcopy
.op1
= gen_lowpart (d
->vmode
, target
);
43328 dcopy
.one_operand_p
= true;
43331 for (i
= 0; i
< nelt
; ++i
)
43333 unsigned e
= dcopy
.perm
[i
] - min
;
43339 /* Test for the degenerate case where the alignment by itself
43340 produces the desired permutation. */
43343 emit_move_insn (d
->target
, dcopy
.op0
);
43347 ok
= expand_vec_perm_1 (&dcopy
);
43353 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43354 the permutation using the SSE4_1 pblendv instruction. Potentially
43355 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43358 expand_vec_perm_pblendv (struct expand_vec_perm_d
*d
)
43360 unsigned i
, which
, nelt
= d
->nelt
;
43361 struct expand_vec_perm_d dcopy
, dcopy1
;
43362 enum machine_mode vmode
= d
->vmode
;
43365 /* Use the same checks as in expand_vec_perm_blend, but skipping
43366 AVX and AVX2 as they require more than 2 instructions. */
43367 if (d
->one_operand_p
)
43369 if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
43374 /* Figure out where permutation elements stay not in their
43375 respective lanes. */
43376 for (i
= 0, which
= 0; i
< nelt
; ++i
)
43378 unsigned e
= d
->perm
[i
];
43380 which
|= (e
< nelt
? 1 : 2);
43382 /* We can pblend the part where elements stay not in their
43383 respective lanes only when these elements are all in one
43384 half of a permutation.
43385 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43386 lanes, but both 8 and 9 >= 8
43387 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43388 respective lanes and 8 >= 8, but 2 not. */
43389 if (which
!= 1 && which
!= 2)
43394 /* First we apply one operand permutation to the part where
43395 elements stay not in their respective lanes. */
43398 dcopy
.op0
= dcopy
.op1
= d
->op1
;
43400 dcopy
.op0
= dcopy
.op1
= d
->op0
;
43401 dcopy
.one_operand_p
= true;
43403 for (i
= 0; i
< nelt
; ++i
)
43404 dcopy
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
43406 ok
= expand_vec_perm_1 (&dcopy
);
43409 /* Next we put permuted elements into their positions. */
43412 dcopy1
.op1
= dcopy
.target
;
43414 dcopy1
.op0
= dcopy
.target
;
43416 for (i
= 0; i
< nelt
; ++i
)
43417 dcopy1
.perm
[i
] = ((d
->perm
[i
] >= nelt
) ? (nelt
+ i
) : i
);
43419 ok
= expand_vec_perm_blend (&dcopy1
);
43425 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
43427 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43428 a two vector permutation into a single vector permutation by using
43429 an interleave operation to merge the vectors. */
43432 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
43434 struct expand_vec_perm_d dremap
, dfinal
;
43435 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
43436 unsigned HOST_WIDE_INT contents
;
43437 unsigned char remap
[2 * MAX_VECT_LEN
];
43439 bool ok
, same_halves
= false;
43441 if (GET_MODE_SIZE (d
->vmode
) == 16)
43443 if (d
->one_operand_p
)
43446 else if (GET_MODE_SIZE (d
->vmode
) == 32)
43450 /* For 32-byte modes allow even d->one_operand_p.
43451 The lack of cross-lane shuffling in some instructions
43452 might prevent a single insn shuffle. */
43454 dfinal
.testing_p
= true;
43455 /* If expand_vec_perm_interleave3 can expand this into
43456 a 3 insn sequence, give up and let it be expanded as
43457 3 insn sequence. While that is one insn longer,
43458 it doesn't need a memory operand and in the common
43459 case that both interleave low and high permutations
43460 with the same operands are adjacent needs 4 insns
43461 for both after CSE. */
43462 if (expand_vec_perm_interleave3 (&dfinal
))
43468 /* Examine from whence the elements come. */
43470 for (i
= 0; i
< nelt
; ++i
)
43471 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
43473 memset (remap
, 0xff, sizeof (remap
));
43476 if (GET_MODE_SIZE (d
->vmode
) == 16)
43478 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
43480 /* Split the two input vectors into 4 halves. */
43481 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
43486 /* If the elements from the low halves use interleave low, and similarly
43487 for interleave high. If the elements are from mis-matched halves, we
43488 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43489 if ((contents
& (h1
| h3
)) == contents
)
43492 for (i
= 0; i
< nelt2
; ++i
)
43495 remap
[i
+ nelt
] = i
* 2 + 1;
43496 dremap
.perm
[i
* 2] = i
;
43497 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
43499 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
43500 dremap
.vmode
= V4SFmode
;
43502 else if ((contents
& (h2
| h4
)) == contents
)
43505 for (i
= 0; i
< nelt2
; ++i
)
43507 remap
[i
+ nelt2
] = i
* 2;
43508 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
43509 dremap
.perm
[i
* 2] = i
+ nelt2
;
43510 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
43512 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
43513 dremap
.vmode
= V4SFmode
;
43515 else if ((contents
& (h1
| h4
)) == contents
)
43518 for (i
= 0; i
< nelt2
; ++i
)
43521 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
43522 dremap
.perm
[i
] = i
;
43523 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
43528 dremap
.vmode
= V2DImode
;
43530 dremap
.perm
[0] = 0;
43531 dremap
.perm
[1] = 3;
43534 else if ((contents
& (h2
| h3
)) == contents
)
43537 for (i
= 0; i
< nelt2
; ++i
)
43539 remap
[i
+ nelt2
] = i
;
43540 remap
[i
+ nelt
] = i
+ nelt2
;
43541 dremap
.perm
[i
] = i
+ nelt2
;
43542 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
43547 dremap
.vmode
= V2DImode
;
43549 dremap
.perm
[0] = 1;
43550 dremap
.perm
[1] = 2;
43558 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
43559 unsigned HOST_WIDE_INT q
[8];
43560 unsigned int nonzero_halves
[4];
43562 /* Split the two input vectors into 8 quarters. */
43563 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
43564 for (i
= 1; i
< 8; ++i
)
43565 q
[i
] = q
[0] << (nelt4
* i
);
43566 for (i
= 0; i
< 4; ++i
)
43567 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
43569 nonzero_halves
[nzcnt
] = i
;
43575 gcc_assert (d
->one_operand_p
);
43576 nonzero_halves
[1] = nonzero_halves
[0];
43577 same_halves
= true;
43579 else if (d
->one_operand_p
)
43581 gcc_assert (nonzero_halves
[0] == 0);
43582 gcc_assert (nonzero_halves
[1] == 1);
43587 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
43589 /* Attempt to increase the likelihood that dfinal
43590 shuffle will be intra-lane. */
43591 char tmph
= nonzero_halves
[0];
43592 nonzero_halves
[0] = nonzero_halves
[1];
43593 nonzero_halves
[1] = tmph
;
43596 /* vperm2f128 or vperm2i128. */
43597 for (i
= 0; i
< nelt2
; ++i
)
43599 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
43600 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
43601 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
43602 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
43605 if (d
->vmode
!= V8SFmode
43606 && d
->vmode
!= V4DFmode
43607 && d
->vmode
!= V8SImode
)
43609 dremap
.vmode
= V8SImode
;
43611 for (i
= 0; i
< 4; ++i
)
43613 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
43614 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
43618 else if (d
->one_operand_p
)
43620 else if (TARGET_AVX2
43621 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
43624 for (i
= 0; i
< nelt4
; ++i
)
43627 remap
[i
+ nelt
] = i
* 2 + 1;
43628 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
43629 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
43630 dremap
.perm
[i
* 2] = i
;
43631 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
43632 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
43633 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
43636 else if (TARGET_AVX2
43637 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
43640 for (i
= 0; i
< nelt4
; ++i
)
43642 remap
[i
+ nelt4
] = i
* 2;
43643 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
43644 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
43645 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
43646 dremap
.perm
[i
* 2] = i
+ nelt4
;
43647 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
43648 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
43649 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
43656 /* Use the remapping array set up above to move the elements from their
43657 swizzled locations into their final destinations. */
43659 for (i
= 0; i
< nelt
; ++i
)
43661 unsigned e
= remap
[d
->perm
[i
]];
43662 gcc_assert (e
< nelt
);
43663 /* If same_halves is true, both halves of the remapped vector are the
43664 same. Avoid cross-lane accesses if possible. */
43665 if (same_halves
&& i
>= nelt2
)
43667 gcc_assert (e
< nelt2
);
43668 dfinal
.perm
[i
] = e
+ nelt2
;
43671 dfinal
.perm
[i
] = e
;
43675 dremap
.target
= gen_reg_rtx (dremap
.vmode
);
43676 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
43678 dfinal
.op1
= dfinal
.op0
;
43679 dfinal
.one_operand_p
= true;
43681 /* Test if the final remap can be done with a single insn. For V4SFmode or
43682 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43684 ok
= expand_vec_perm_1 (&dfinal
);
43685 seq
= get_insns ();
43694 if (dremap
.vmode
!= dfinal
.vmode
)
43696 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
43697 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
43700 ok
= expand_vec_perm_1 (&dremap
);
43707 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43708 a single vector cross-lane permutation into vpermq followed
43709 by any of the single insn permutations. */
43712 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
43714 struct expand_vec_perm_d dremap
, dfinal
;
43715 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
43716 unsigned contents
[2];
43720 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
43721 && d
->one_operand_p
))
43726 for (i
= 0; i
< nelt2
; ++i
)
43728 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
43729 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
43732 for (i
= 0; i
< 2; ++i
)
43734 unsigned int cnt
= 0;
43735 for (j
= 0; j
< 4; ++j
)
43736 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
43744 dremap
.vmode
= V4DImode
;
43746 dremap
.target
= gen_reg_rtx (V4DImode
);
43747 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
43748 dremap
.op1
= dremap
.op0
;
43749 dremap
.one_operand_p
= true;
43750 for (i
= 0; i
< 2; ++i
)
43752 unsigned int cnt
= 0;
43753 for (j
= 0; j
< 4; ++j
)
43754 if ((contents
[i
] & (1u << j
)) != 0)
43755 dremap
.perm
[2 * i
+ cnt
++] = j
;
43756 for (; cnt
< 2; ++cnt
)
43757 dremap
.perm
[2 * i
+ cnt
] = 0;
43761 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
43762 dfinal
.op1
= dfinal
.op0
;
43763 dfinal
.one_operand_p
= true;
43764 for (i
= 0, j
= 0; i
< nelt
; ++i
)
43768 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
43769 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
43771 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
43772 dfinal
.perm
[i
] |= nelt4
;
43774 gcc_unreachable ();
43777 ok
= expand_vec_perm_1 (&dremap
);
43780 ok
= expand_vec_perm_1 (&dfinal
);
43786 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43787 a vector permutation using two instructions, vperm2f128 resp.
43788 vperm2i128 followed by any single in-lane permutation. */
43791 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
43793 struct expand_vec_perm_d dfirst
, dsecond
;
43794 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
43798 || GET_MODE_SIZE (d
->vmode
) != 32
43799 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
43803 dsecond
.one_operand_p
= false;
43804 dsecond
.testing_p
= true;
43806 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43807 immediate. For perm < 16 the second permutation uses
43808 d->op0 as first operand, for perm >= 16 it uses d->op1
43809 as first operand. The second operand is the result of
43811 for (perm
= 0; perm
< 32; perm
++)
43813 /* Ignore permutations which do not move anything cross-lane. */
43816 /* The second shuffle for e.g. V4DFmode has
43817 0123 and ABCD operands.
43818 Ignore AB23, as 23 is already in the second lane
43819 of the first operand. */
43820 if ((perm
& 0xc) == (1 << 2)) continue;
43821 /* And 01CD, as 01 is in the first lane of the first
43823 if ((perm
& 3) == 0) continue;
43824 /* And 4567, as then the vperm2[fi]128 doesn't change
43825 anything on the original 4567 second operand. */
43826 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
43830 /* The second shuffle for e.g. V4DFmode has
43831 4567 and ABCD operands.
43832 Ignore AB67, as 67 is already in the second lane
43833 of the first operand. */
43834 if ((perm
& 0xc) == (3 << 2)) continue;
43835 /* And 45CD, as 45 is in the first lane of the first
43837 if ((perm
& 3) == 2) continue;
43838 /* And 0123, as then the vperm2[fi]128 doesn't change
43839 anything on the original 0123 first operand. */
43840 if ((perm
& 0xf) == (1 << 2)) continue;
43843 for (i
= 0; i
< nelt
; i
++)
43845 j
= d
->perm
[i
] / nelt2
;
43846 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
43847 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
43848 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
43849 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
43857 ok
= expand_vec_perm_1 (&dsecond
);
43868 /* Found a usable second shuffle. dfirst will be
43869 vperm2f128 on d->op0 and d->op1. */
43870 dsecond
.testing_p
= false;
43872 dfirst
.target
= gen_reg_rtx (d
->vmode
);
43873 for (i
= 0; i
< nelt
; i
++)
43874 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
43875 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
43877 ok
= expand_vec_perm_1 (&dfirst
);
43880 /* And dsecond is some single insn shuffle, taking
43881 d->op0 and result of vperm2f128 (if perm < 16) or
43882 d->op1 and result of vperm2f128 (otherwise). */
43883 dsecond
.op1
= dfirst
.target
;
43885 dsecond
.op0
= dfirst
.op1
;
43887 ok
= expand_vec_perm_1 (&dsecond
);
43893 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43894 if (d
->one_operand_p
)
43901 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43902 a two vector permutation using 2 intra-lane interleave insns
43903 and cross-lane shuffle for 32-byte vectors. */
43906 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
43909 rtx (*gen
) (rtx
, rtx
, rtx
);
43911 if (d
->one_operand_p
)
43913 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
43915 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
43921 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
43923 for (i
= 0; i
< nelt
; i
+= 2)
43924 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
43925 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
43935 gen
= gen_vec_interleave_highv32qi
;
43937 gen
= gen_vec_interleave_lowv32qi
;
43941 gen
= gen_vec_interleave_highv16hi
;
43943 gen
= gen_vec_interleave_lowv16hi
;
43947 gen
= gen_vec_interleave_highv8si
;
43949 gen
= gen_vec_interleave_lowv8si
;
43953 gen
= gen_vec_interleave_highv4di
;
43955 gen
= gen_vec_interleave_lowv4di
;
43959 gen
= gen_vec_interleave_highv8sf
;
43961 gen
= gen_vec_interleave_lowv8sf
;
43965 gen
= gen_vec_interleave_highv4df
;
43967 gen
= gen_vec_interleave_lowv4df
;
43970 gcc_unreachable ();
43973 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
43977 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43978 a single vector permutation using a single intra-lane vector
43979 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43980 the non-swapped and swapped vectors together. */
43983 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
43985 struct expand_vec_perm_d dfirst
, dsecond
;
43986 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
43989 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
43993 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
43994 || !d
->one_operand_p
)
43998 for (i
= 0; i
< nelt
; i
++)
43999 dfirst
.perm
[i
] = 0xff;
44000 for (i
= 0, msk
= 0; i
< nelt
; i
++)
44002 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
44003 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
44005 dfirst
.perm
[j
] = d
->perm
[i
];
44009 for (i
= 0; i
< nelt
; i
++)
44010 if (dfirst
.perm
[i
] == 0xff)
44011 dfirst
.perm
[i
] = i
;
44014 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
44017 ok
= expand_vec_perm_1 (&dfirst
);
44018 seq
= get_insns ();
44030 dsecond
.op0
= dfirst
.target
;
44031 dsecond
.op1
= dfirst
.target
;
44032 dsecond
.one_operand_p
= true;
44033 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
44034 for (i
= 0; i
< nelt
; i
++)
44035 dsecond
.perm
[i
] = i
^ nelt2
;
44037 ok
= expand_vec_perm_1 (&dsecond
);
44040 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
44041 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
44045 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44046 permutation using two vperm2f128, followed by a vshufpd insn blending
44047 the two vectors together. */
44050 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
44052 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
44055 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
44065 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
44066 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
44067 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
44068 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
44069 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
44070 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
44071 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
44072 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
44073 dthird
.perm
[0] = (d
->perm
[0] % 2);
44074 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
44075 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
44076 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
44078 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
44079 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
44080 dthird
.op0
= dfirst
.target
;
44081 dthird
.op1
= dsecond
.target
;
44082 dthird
.one_operand_p
= false;
44084 canonicalize_perm (&dfirst
);
44085 canonicalize_perm (&dsecond
);
44087 ok
= expand_vec_perm_1 (&dfirst
)
44088 && expand_vec_perm_1 (&dsecond
)
44089 && expand_vec_perm_1 (&dthird
);
44096 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44097 permutation with two pshufb insns and an ior. We should have already
44098 failed all two instruction sequences. */
44101 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
44103 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
44104 unsigned int i
, nelt
, eltsz
;
44106 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
44108 gcc_assert (!d
->one_operand_p
);
44114 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
44116 /* Generate two permutation masks. If the required element is within
44117 the given vector it is shuffled into the proper lane. If the required
44118 element is in the other vector, force a zero into the lane by setting
44119 bit 7 in the permutation mask. */
44120 m128
= GEN_INT (-128);
44121 for (i
= 0; i
< nelt
; ++i
)
44123 unsigned j
, e
= d
->perm
[i
];
44124 unsigned which
= (e
>= nelt
);
44128 for (j
= 0; j
< eltsz
; ++j
)
44130 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
44131 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
44135 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
44136 vperm
= force_reg (V16QImode
, vperm
);
44138 l
= gen_reg_rtx (V16QImode
);
44139 op
= gen_lowpart (V16QImode
, d
->op0
);
44140 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
44142 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
44143 vperm
= force_reg (V16QImode
, vperm
);
44145 h
= gen_reg_rtx (V16QImode
);
44146 op
= gen_lowpart (V16QImode
, d
->op1
);
44147 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
44150 if (d
->vmode
!= V16QImode
)
44151 op
= gen_reg_rtx (V16QImode
);
44152 emit_insn (gen_iorv16qi3 (op
, l
, h
));
44153 if (op
!= d
->target
)
44154 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
44159 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44160 with two vpshufb insns, vpermq and vpor. We should have already failed
44161 all two or three instruction sequences. */
44164 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
44166 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
44167 unsigned int i
, nelt
, eltsz
;
44170 || !d
->one_operand_p
44171 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
44178 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
44180 /* Generate two permutation masks. If the required element is within
44181 the same lane, it is shuffled in. If the required element from the
44182 other lane, force a zero by setting bit 7 in the permutation mask.
44183 In the other mask the mask has non-negative elements if element
44184 is requested from the other lane, but also moved to the other lane,
44185 so that the result of vpshufb can have the two V2TImode halves
44187 m128
= GEN_INT (-128);
44188 for (i
= 0; i
< nelt
; ++i
)
44190 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
44191 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
44193 for (j
= 0; j
< eltsz
; ++j
)
44195 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
44196 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
44200 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
44201 vperm
= force_reg (V32QImode
, vperm
);
44203 h
= gen_reg_rtx (V32QImode
);
44204 op
= gen_lowpart (V32QImode
, d
->op0
);
44205 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
44207 /* Swap the 128-byte lanes of h into hp. */
44208 hp
= gen_reg_rtx (V4DImode
);
44209 op
= gen_lowpart (V4DImode
, h
);
44210 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
44213 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
44214 vperm
= force_reg (V32QImode
, vperm
);
44216 l
= gen_reg_rtx (V32QImode
);
44217 op
= gen_lowpart (V32QImode
, d
->op0
);
44218 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
44221 if (d
->vmode
!= V32QImode
)
44222 op
= gen_reg_rtx (V32QImode
);
44223 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
44224 if (op
!= d
->target
)
44225 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
44230 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44231 and extract-odd permutations of two V32QImode and V16QImode operand
44232 with two vpshufb insns, vpor and vpermq. We should have already
44233 failed all two or three instruction sequences. */
44236 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
44238 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
44239 unsigned int i
, nelt
, eltsz
;
44242 || d
->one_operand_p
44243 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
44246 for (i
= 0; i
< d
->nelt
; ++i
)
44247 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
44254 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
44256 /* Generate two permutation masks. In the first permutation mask
44257 the first quarter will contain indexes for the first half
44258 of the op0, the second quarter will contain bit 7 set, third quarter
44259 will contain indexes for the second half of the op0 and the
44260 last quarter bit 7 set. In the second permutation mask
44261 the first quarter will contain bit 7 set, the second quarter
44262 indexes for the first half of the op1, the third quarter bit 7 set
44263 and last quarter indexes for the second half of the op1.
44264 I.e. the first mask e.g. for V32QImode extract even will be:
44265 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44266 (all values masked with 0xf except for -128) and second mask
44267 for extract even will be
44268 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44269 m128
= GEN_INT (-128);
44270 for (i
= 0; i
< nelt
; ++i
)
44272 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
44273 unsigned which
= d
->perm
[i
] >= nelt
;
44274 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
44276 for (j
= 0; j
< eltsz
; ++j
)
44278 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
44279 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
44283 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
44284 vperm
= force_reg (V32QImode
, vperm
);
44286 l
= gen_reg_rtx (V32QImode
);
44287 op
= gen_lowpart (V32QImode
, d
->op0
);
44288 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
44290 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
44291 vperm
= force_reg (V32QImode
, vperm
);
44293 h
= gen_reg_rtx (V32QImode
);
44294 op
= gen_lowpart (V32QImode
, d
->op1
);
44295 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
44297 ior
= gen_reg_rtx (V32QImode
);
44298 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
44300 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44301 op
= gen_reg_rtx (V4DImode
);
44302 ior
= gen_lowpart (V4DImode
, ior
);
44303 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
44304 const1_rtx
, GEN_INT (3)));
44305 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
44310 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44311 and extract-odd permutations. */
44314 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
44316 rtx t1
, t2
, t3
, t4
, t5
;
44323 t1
= gen_reg_rtx (V4DFmode
);
44324 t2
= gen_reg_rtx (V4DFmode
);
44326 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44327 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
44328 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
44330 /* Now an unpck[lh]pd will produce the result required. */
44332 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
44334 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
44340 int mask
= odd
? 0xdd : 0x88;
44344 t1
= gen_reg_rtx (V8SFmode
);
44345 t2
= gen_reg_rtx (V8SFmode
);
44346 t3
= gen_reg_rtx (V8SFmode
);
44348 /* Shuffle within the 128-bit lanes to produce:
44349 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44350 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
44353 /* Shuffle the lanes around to produce:
44354 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44355 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
44358 /* Shuffle within the 128-bit lanes to produce:
44359 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44360 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
44362 /* Shuffle within the 128-bit lanes to produce:
44363 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44364 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
44366 /* Shuffle the lanes around to produce:
44367 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44368 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
44377 /* These are always directly implementable by expand_vec_perm_1. */
44378 gcc_unreachable ();
44381 if (TARGET_SSSE3
&& !TARGET_SLOW_PSHUFB
)
44382 return expand_vec_perm_pshufb2 (d
);
44387 /* We need 2*log2(N)-1 operations to achieve odd/even
44388 with interleave. */
44389 t1
= gen_reg_rtx (V8HImode
);
44390 t2
= gen_reg_rtx (V8HImode
);
44391 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
44392 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
44393 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
44394 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
44396 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
44398 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
44404 if (TARGET_SSSE3
&& !TARGET_SLOW_PSHUFB
)
44405 return expand_vec_perm_pshufb2 (d
);
44410 t1
= gen_reg_rtx (V16QImode
);
44411 t2
= gen_reg_rtx (V16QImode
);
44412 t3
= gen_reg_rtx (V16QImode
);
44413 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
44414 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
44415 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
44416 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
44417 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
44418 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
44420 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
44422 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
44429 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
44434 struct expand_vec_perm_d d_copy
= *d
;
44435 d_copy
.vmode
= V4DFmode
;
44437 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
44439 d_copy
.target
= gen_reg_rtx (V4DFmode
);
44440 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
44441 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
44442 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
44445 emit_move_insn (d
->target
,
44446 gen_lowpart (V4DImode
, d_copy
.target
));
44455 t1
= gen_reg_rtx (V4DImode
);
44456 t2
= gen_reg_rtx (V4DImode
);
44458 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44459 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
44460 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
44462 /* Now an vpunpck[lh]qdq will produce the result required. */
44464 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
44466 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
44473 struct expand_vec_perm_d d_copy
= *d
;
44474 d_copy
.vmode
= V8SFmode
;
44476 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
44478 d_copy
.target
= gen_reg_rtx (V8SFmode
);
44479 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
44480 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
44481 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
44484 emit_move_insn (d
->target
,
44485 gen_lowpart (V8SImode
, d_copy
.target
));
44494 t1
= gen_reg_rtx (V8SImode
);
44495 t2
= gen_reg_rtx (V8SImode
);
44496 t3
= gen_reg_rtx (V4DImode
);
44497 t4
= gen_reg_rtx (V4DImode
);
44498 t5
= gen_reg_rtx (V4DImode
);
44500 /* Shuffle the lanes around into
44501 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44502 emit_insn (gen_avx2_permv2ti (t3
, gen_lowpart (V4DImode
, d
->op0
),
44503 gen_lowpart (V4DImode
, d
->op1
),
44505 emit_insn (gen_avx2_permv2ti (t4
, gen_lowpart (V4DImode
, d
->op0
),
44506 gen_lowpart (V4DImode
, d
->op1
),
44509 /* Swap the 2nd and 3rd position in each lane into
44510 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44511 emit_insn (gen_avx2_pshufdv3 (t1
, gen_lowpart (V8SImode
, t3
),
44512 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44513 emit_insn (gen_avx2_pshufdv3 (t2
, gen_lowpart (V8SImode
, t4
),
44514 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44516 /* Now an vpunpck[lh]qdq will produce
44517 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44519 t3
= gen_avx2_interleave_highv4di (t5
, gen_lowpart (V4DImode
, t1
),
44520 gen_lowpart (V4DImode
, t2
));
44522 t3
= gen_avx2_interleave_lowv4di (t5
, gen_lowpart (V4DImode
, t1
),
44523 gen_lowpart (V4DImode
, t2
));
44525 emit_move_insn (d
->target
, gen_lowpart (V8SImode
, t5
));
44529 gcc_unreachable ();
44535 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44536 extract-even and extract-odd permutations. */
44539 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
44541 unsigned i
, odd
, nelt
= d
->nelt
;
44544 if (odd
!= 0 && odd
!= 1)
44547 for (i
= 1; i
< nelt
; ++i
)
44548 if (d
->perm
[i
] != 2 * i
+ odd
)
44551 return expand_vec_perm_even_odd_1 (d
, odd
);
44554 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44555 permutations. We assume that expand_vec_perm_1 has already failed. */
44558 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
44560 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
44561 enum machine_mode vmode
= d
->vmode
;
44562 unsigned char perm2
[4];
44563 rtx op0
= d
->op0
, dest
;
44570 /* These are special-cased in sse.md so that we can optionally
44571 use the vbroadcast instruction. They expand to two insns
44572 if the input happens to be in a register. */
44573 gcc_unreachable ();
44579 /* These are always implementable using standard shuffle patterns. */
44580 gcc_unreachable ();
44584 /* These can be implemented via interleave. We save one insn by
44585 stopping once we have promoted to V4SImode and then use pshufd. */
44591 rtx (*gen
) (rtx
, rtx
, rtx
)
44592 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
44593 : gen_vec_interleave_lowv8hi
;
44597 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
44598 : gen_vec_interleave_highv8hi
;
44603 dest
= gen_reg_rtx (vmode
);
44604 emit_insn (gen (dest
, op0
, op0
));
44605 vmode
= get_mode_wider_vector (vmode
);
44606 op0
= gen_lowpart (vmode
, dest
);
44608 while (vmode
!= V4SImode
);
44610 memset (perm2
, elt
, 4);
44611 dest
= gen_reg_rtx (V4SImode
);
44612 ok
= expand_vselect (dest
, op0
, perm2
, 4, d
->testing_p
);
44615 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, dest
));
44622 /* For AVX2 broadcasts of the first element vpbroadcast* or
44623 vpermq should be used by expand_vec_perm_1. */
44624 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
44628 gcc_unreachable ();
44632 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44633 broadcast permutations. */
44636 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
44638 unsigned i
, elt
, nelt
= d
->nelt
;
44640 if (!d
->one_operand_p
)
44644 for (i
= 1; i
< nelt
; ++i
)
44645 if (d
->perm
[i
] != elt
)
44648 return expand_vec_perm_broadcast_1 (d
);
44651 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44652 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44653 all the shorter instruction sequences. */
44656 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
44658 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
44659 unsigned int i
, nelt
, eltsz
;
44663 || d
->one_operand_p
44664 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
44671 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
44673 /* Generate 4 permutation masks. If the required element is within
44674 the same lane, it is shuffled in. If the required element from the
44675 other lane, force a zero by setting bit 7 in the permutation mask.
44676 In the other mask the mask has non-negative elements if element
44677 is requested from the other lane, but also moved to the other lane,
44678 so that the result of vpshufb can have the two V2TImode halves
44680 m128
= GEN_INT (-128);
44681 for (i
= 0; i
< 32; ++i
)
44683 rperm
[0][i
] = m128
;
44684 rperm
[1][i
] = m128
;
44685 rperm
[2][i
] = m128
;
44686 rperm
[3][i
] = m128
;
44692 for (i
= 0; i
< nelt
; ++i
)
44694 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
44695 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
44696 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
44698 for (j
= 0; j
< eltsz
; ++j
)
44699 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
44700 used
[which
] = true;
44703 for (i
= 0; i
< 2; ++i
)
44705 if (!used
[2 * i
+ 1])
44710 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
44711 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
44712 vperm
= force_reg (V32QImode
, vperm
);
44713 h
[i
] = gen_reg_rtx (V32QImode
);
44714 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
44715 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
44718 /* Swap the 128-byte lanes of h[X]. */
44719 for (i
= 0; i
< 2; ++i
)
44721 if (h
[i
] == NULL_RTX
)
44723 op
= gen_reg_rtx (V4DImode
);
44724 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
44725 const2_rtx
, GEN_INT (3), const0_rtx
,
44727 h
[i
] = gen_lowpart (V32QImode
, op
);
44730 for (i
= 0; i
< 2; ++i
)
44737 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
44738 vperm
= force_reg (V32QImode
, vperm
);
44739 l
[i
] = gen_reg_rtx (V32QImode
);
44740 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
44741 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
44744 for (i
= 0; i
< 2; ++i
)
44748 op
= gen_reg_rtx (V32QImode
);
44749 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
44756 gcc_assert (l
[0] && l
[1]);
44758 if (d
->vmode
!= V32QImode
)
44759 op
= gen_reg_rtx (V32QImode
);
44760 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
44761 if (op
!= d
->target
)
44762 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
44766 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44767 With all of the interface bits taken care of, perform the expansion
44768 in D and return true on success. */
44771 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
44773 /* Try a single instruction expansion. */
44774 if (expand_vec_perm_1 (d
))
44777 /* Try sequences of two instructions. */
44779 if (expand_vec_perm_pshuflw_pshufhw (d
))
44782 if (expand_vec_perm_palignr (d
))
44785 if (expand_vec_perm_interleave2 (d
))
44788 if (expand_vec_perm_broadcast (d
))
44791 if (expand_vec_perm_vpermq_perm_1 (d
))
44794 if (expand_vec_perm_vperm2f128 (d
))
44797 if (expand_vec_perm_pblendv (d
))
44800 /* Try sequences of three instructions. */
44802 if (expand_vec_perm_2vperm2f128_vshuf (d
))
44805 if (expand_vec_perm_pshufb2 (d
))
44808 if (expand_vec_perm_interleave3 (d
))
44811 if (expand_vec_perm_vperm2f128_vblend (d
))
44814 /* Try sequences of four instructions. */
44816 if (expand_vec_perm_vpshufb2_vpermq (d
))
44819 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
44822 /* ??? Look for narrow permutations whose element orderings would
44823 allow the promotion to a wider mode. */
44825 /* ??? Look for sequences of interleave or a wider permute that place
44826 the data into the correct lanes for a half-vector shuffle like
44827 pshuf[lh]w or vpermilps. */
44829 /* ??? Look for sequences of interleave that produce the desired results.
44830 The combinatorics of punpck[lh] get pretty ugly... */
44832 if (expand_vec_perm_even_odd (d
))
44835 /* Even longer sequences. */
44836 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
44842 /* If a permutation only uses one operand, make it clear. Returns true
44843 if the permutation references both operands. */
44846 canonicalize_perm (struct expand_vec_perm_d
*d
)
44848 int i
, which
, nelt
= d
->nelt
;
44850 for (i
= which
= 0; i
< nelt
; ++i
)
44851 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
44853 d
->one_operand_p
= true;
44860 if (!rtx_equal_p (d
->op0
, d
->op1
))
44862 d
->one_operand_p
= false;
44865 /* The elements of PERM do not suggest that only the first operand
44866 is used, but both operands are identical. Allow easier matching
44867 of the permutation by folding the permutation into the single
44872 for (i
= 0; i
< nelt
; ++i
)
44873 d
->perm
[i
] &= nelt
- 1;
44882 return (which
== 3);
44886 ix86_expand_vec_perm_const (rtx operands
[4])
44888 struct expand_vec_perm_d d
;
44889 unsigned char perm
[MAX_VECT_LEN
];
44894 d
.target
= operands
[0];
44895 d
.op0
= operands
[1];
44896 d
.op1
= operands
[2];
44899 d
.vmode
= GET_MODE (d
.target
);
44900 gcc_assert (VECTOR_MODE_P (d
.vmode
));
44901 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
44902 d
.testing_p
= false;
44904 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
44905 gcc_assert (XVECLEN (sel
, 0) == nelt
);
44906 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
44908 for (i
= 0; i
< nelt
; ++i
)
44910 rtx e
= XVECEXP (sel
, 0, i
);
44911 int ei
= INTVAL (e
) & (2 * nelt
- 1);
44916 two_args
= canonicalize_perm (&d
);
44918 if (ix86_expand_vec_perm_const_1 (&d
))
44921 /* If the selector says both arguments are needed, but the operands are the
44922 same, the above tried to expand with one_operand_p and flattened selector.
44923 If that didn't work, retry without one_operand_p; we succeeded with that
44925 if (two_args
&& d
.one_operand_p
)
44927 d
.one_operand_p
= false;
44928 memcpy (d
.perm
, perm
, sizeof (perm
));
44929 return ix86_expand_vec_perm_const_1 (&d
);
44935 /* Implement targetm.vectorize.vec_perm_const_ok. */
44938 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
44939 const unsigned char *sel
)
44941 struct expand_vec_perm_d d
;
44942 unsigned int i
, nelt
, which
;
44946 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
44947 d
.testing_p
= true;
44949 /* Given sufficient ISA support we can just return true here
44950 for selected vector modes. */
44951 if (d
.vmode
== V16SImode
|| d
.vmode
== V16SFmode
44952 || d
.vmode
== V8DFmode
|| d
.vmode
== V8DImode
)
44953 /* All implementable with a single vpermi2 insn. */
44955 if (GET_MODE_SIZE (d
.vmode
) == 16)
44957 /* All implementable with a single vpperm insn. */
44960 /* All implementable with 2 pshufb + 1 ior. */
44963 /* All implementable with shufpd or unpck[lh]pd. */
44968 /* Extract the values from the vector CST into the permutation
44970 memcpy (d
.perm
, sel
, nelt
);
44971 for (i
= which
= 0; i
< nelt
; ++i
)
44973 unsigned char e
= d
.perm
[i
];
44974 gcc_assert (e
< 2 * nelt
);
44975 which
|= (e
< nelt
? 1 : 2);
44978 /* For all elements from second vector, fold the elements to first. */
44980 for (i
= 0; i
< nelt
; ++i
)
44983 /* Check whether the mask can be applied to the vector type. */
44984 d
.one_operand_p
= (which
!= 3);
44986 /* Implementable with shufps or pshufd. */
44987 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
44990 /* Otherwise we have to go through the motions and see if we can
44991 figure out how to generate the requested permutation. */
44992 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
44993 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
44994 if (!d
.one_operand_p
)
44995 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
44998 ret
= ix86_expand_vec_perm_const_1 (&d
);
45005 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
45007 struct expand_vec_perm_d d
;
45013 d
.vmode
= GET_MODE (targ
);
45014 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
45015 d
.one_operand_p
= false;
45016 d
.testing_p
= false;
45018 for (i
= 0; i
< nelt
; ++i
)
45019 d
.perm
[i
] = i
* 2 + odd
;
45021 /* We'll either be able to implement the permutation directly... */
45022 if (expand_vec_perm_1 (&d
))
45025 /* ... or we use the special-case patterns. */
45026 expand_vec_perm_even_odd_1 (&d
, odd
);
45030 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
45032 struct expand_vec_perm_d d
;
45033 unsigned i
, nelt
, base
;
45039 d
.vmode
= GET_MODE (targ
);
45040 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
45041 d
.one_operand_p
= false;
45042 d
.testing_p
= false;
45044 base
= high_p
? nelt
/ 2 : 0;
45045 for (i
= 0; i
< nelt
/ 2; ++i
)
45047 d
.perm
[i
* 2] = i
+ base
;
45048 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
45051 /* Note that for AVX this isn't one instruction. */
45052 ok
= ix86_expand_vec_perm_const_1 (&d
);
45057 /* Expand a vector operation CODE for a V*QImode in terms of the
45058 same operation on V*HImode. */
45061 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
45063 enum machine_mode qimode
= GET_MODE (dest
);
45064 enum machine_mode himode
;
45065 rtx (*gen_il
) (rtx
, rtx
, rtx
);
45066 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
45067 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
45068 struct expand_vec_perm_d d
;
45069 bool ok
, full_interleave
;
45070 bool uns_p
= false;
45077 gen_il
= gen_vec_interleave_lowv16qi
;
45078 gen_ih
= gen_vec_interleave_highv16qi
;
45081 himode
= V16HImode
;
45082 gen_il
= gen_avx2_interleave_lowv32qi
;
45083 gen_ih
= gen_avx2_interleave_highv32qi
;
45086 gcc_unreachable ();
45089 op2_l
= op2_h
= op2
;
45093 /* Unpack data such that we've got a source byte in each low byte of
45094 each word. We don't care what goes into the high byte of each word.
45095 Rather than trying to get zero in there, most convenient is to let
45096 it be a copy of the low byte. */
45097 op2_l
= gen_reg_rtx (qimode
);
45098 op2_h
= gen_reg_rtx (qimode
);
45099 emit_insn (gen_il (op2_l
, op2
, op2
));
45100 emit_insn (gen_ih (op2_h
, op2
, op2
));
45103 op1_l
= gen_reg_rtx (qimode
);
45104 op1_h
= gen_reg_rtx (qimode
);
45105 emit_insn (gen_il (op1_l
, op1
, op1
));
45106 emit_insn (gen_ih (op1_h
, op1
, op1
));
45107 full_interleave
= qimode
== V16QImode
;
45115 op1_l
= gen_reg_rtx (himode
);
45116 op1_h
= gen_reg_rtx (himode
);
45117 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
45118 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
45119 full_interleave
= true;
45122 gcc_unreachable ();
45125 /* Perform the operation. */
45126 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
45128 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
45130 gcc_assert (res_l
&& res_h
);
45132 /* Merge the data back into the right place. */
45134 d
.op0
= gen_lowpart (qimode
, res_l
);
45135 d
.op1
= gen_lowpart (qimode
, res_h
);
45137 d
.nelt
= GET_MODE_NUNITS (qimode
);
45138 d
.one_operand_p
= false;
45139 d
.testing_p
= false;
45141 if (full_interleave
)
45143 /* For SSE2, we used an full interleave, so the desired
45144 results are in the even elements. */
45145 for (i
= 0; i
< 32; ++i
)
45150 /* For AVX, the interleave used above was not cross-lane. So the
45151 extraction is evens but with the second and third quarter swapped.
45152 Happily, that is even one insn shorter than even extraction. */
45153 for (i
= 0; i
< 32; ++i
)
45154 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
45157 ok
= ix86_expand_vec_perm_const_1 (&d
);
45160 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
45161 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
45164 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45165 if op is CONST_VECTOR with all odd elements equal to their
45166 preceding element. */
45169 const_vector_equal_evenodd_p (rtx op
)
45171 enum machine_mode mode
= GET_MODE (op
);
45172 int i
, nunits
= GET_MODE_NUNITS (mode
);
45173 if (GET_CODE (op
) != CONST_VECTOR
45174 || nunits
!= CONST_VECTOR_NUNITS (op
))
45176 for (i
= 0; i
< nunits
; i
+= 2)
45177 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
45183 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
45184 bool uns_p
, bool odd_p
)
45186 enum machine_mode mode
= GET_MODE (op1
);
45187 enum machine_mode wmode
= GET_MODE (dest
);
45189 rtx orig_op1
= op1
, orig_op2
= op2
;
45191 if (!nonimmediate_operand (op1
, mode
))
45192 op1
= force_reg (mode
, op1
);
45193 if (!nonimmediate_operand (op2
, mode
))
45194 op2
= force_reg (mode
, op2
);
45196 /* We only play even/odd games with vectors of SImode. */
45197 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
|| mode
== V16SImode
);
45199 /* If we're looking for the odd results, shift those members down to
45200 the even slots. For some cpus this is faster than a PSHUFD. */
45203 /* For XOP use vpmacsdqh, but only for smult, as it is only
45205 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
45207 x
= force_reg (wmode
, CONST0_RTX (wmode
));
45208 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
45212 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
45213 if (!const_vector_equal_evenodd_p (orig_op1
))
45214 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
45215 x
, NULL
, 1, OPTAB_DIRECT
);
45216 if (!const_vector_equal_evenodd_p (orig_op2
))
45217 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
45218 x
, NULL
, 1, OPTAB_DIRECT
);
45219 op1
= gen_lowpart (mode
, op1
);
45220 op2
= gen_lowpart (mode
, op2
);
45223 if (mode
== V16SImode
)
45226 x
= gen_vec_widen_umult_even_v16si (dest
, op1
, op2
);
45228 x
= gen_vec_widen_smult_even_v16si (dest
, op1
, op2
);
45230 else if (mode
== V8SImode
)
45233 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
45235 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
45238 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
45239 else if (TARGET_SSE4_1
)
45240 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
45243 rtx s1
, s2
, t0
, t1
, t2
;
45245 /* The easiest way to implement this without PMULDQ is to go through
45246 the motions as if we are performing a full 64-bit multiply. With
45247 the exception that we need to do less shuffling of the elements. */
45249 /* Compute the sign-extension, aka highparts, of the two operands. */
45250 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
45251 op1
, pc_rtx
, pc_rtx
);
45252 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
45253 op2
, pc_rtx
, pc_rtx
);
45255 /* Multiply LO(A) * HI(B), and vice-versa. */
45256 t1
= gen_reg_rtx (wmode
);
45257 t2
= gen_reg_rtx (wmode
);
45258 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
45259 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
45261 /* Multiply LO(A) * LO(B). */
45262 t0
= gen_reg_rtx (wmode
);
45263 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
45265 /* Combine and shift the highparts into place. */
45266 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
45267 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
45270 /* Combine high and low parts. */
45271 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
45278 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
45279 bool uns_p
, bool high_p
)
45281 enum machine_mode wmode
= GET_MODE (dest
);
45282 enum machine_mode mode
= GET_MODE (op1
);
45283 rtx t1
, t2
, t3
, t4
, mask
;
45288 t1
= gen_reg_rtx (mode
);
45289 t2
= gen_reg_rtx (mode
);
45290 if (TARGET_XOP
&& !uns_p
)
45292 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45293 shuffle the elements once so that all elements are in the right
45294 place for immediate use: { A C B D }. */
45295 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
45296 const1_rtx
, GEN_INT (3)));
45297 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
45298 const1_rtx
, GEN_INT (3)));
45302 /* Put the elements into place for the multiply. */
45303 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
45304 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
45307 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
45311 /* Shuffle the elements between the lanes. After this we
45312 have { A B E F | C D G H } for each operand. */
45313 t1
= gen_reg_rtx (V4DImode
);
45314 t2
= gen_reg_rtx (V4DImode
);
45315 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
45316 const0_rtx
, const2_rtx
,
45317 const1_rtx
, GEN_INT (3)));
45318 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
45319 const0_rtx
, const2_rtx
,
45320 const1_rtx
, GEN_INT (3)));
45322 /* Shuffle the elements within the lanes. After this we
45323 have { A A B B | C C D D } or { E E F F | G G H H }. */
45324 t3
= gen_reg_rtx (V8SImode
);
45325 t4
= gen_reg_rtx (V8SImode
);
45326 mask
= GEN_INT (high_p
45327 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45328 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45329 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
45330 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
45332 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
45337 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
45338 uns_p
, OPTAB_DIRECT
);
45339 t2
= expand_binop (mode
,
45340 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
45341 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
45342 gcc_assert (t1
&& t2
);
45344 t3
= gen_reg_rtx (mode
);
45345 ix86_expand_vec_interleave (t3
, t1
, t2
, high_p
);
45346 emit_move_insn (dest
, gen_lowpart (wmode
, t3
));
45351 t1
= gen_reg_rtx (wmode
);
45352 t2
= gen_reg_rtx (wmode
);
45353 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
45354 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
45356 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
45360 gcc_unreachable ();
45365 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
45367 rtx res_1
, res_2
, res_3
, res_4
;
45369 res_1
= gen_reg_rtx (V4SImode
);
45370 res_2
= gen_reg_rtx (V4SImode
);
45371 res_3
= gen_reg_rtx (V2DImode
);
45372 res_4
= gen_reg_rtx (V2DImode
);
45373 ix86_expand_mul_widen_evenodd (res_3
, op1
, op2
, true, false);
45374 ix86_expand_mul_widen_evenodd (res_4
, op1
, op2
, true, true);
45376 /* Move the results in element 2 down to element 1; we don't care
45377 what goes in elements 2 and 3. Then we can merge the parts
45378 back together with an interleave.
45380 Note that two other sequences were tried:
45381 (1) Use interleaves at the start instead of psrldq, which allows
45382 us to use a single shufps to merge things back at the end.
45383 (2) Use shufps here to combine the two vectors, then pshufd to
45384 put the elements in the correct order.
45385 In both cases the cost of the reformatting stall was too high
45386 and the overall sequence slower. */
45388 emit_insn (gen_sse2_pshufd_1 (res_1
, gen_lowpart (V4SImode
, res_3
),
45389 const0_rtx
, const2_rtx
,
45390 const0_rtx
, const0_rtx
));
45391 emit_insn (gen_sse2_pshufd_1 (res_2
, gen_lowpart (V4SImode
, res_4
),
45392 const0_rtx
, const2_rtx
,
45393 const0_rtx
, const0_rtx
));
45394 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
45396 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
45400 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
45402 enum machine_mode mode
= GET_MODE (op0
);
45403 rtx t1
, t2
, t3
, t4
, t5
, t6
;
45405 if (TARGET_XOP
&& mode
== V2DImode
)
45407 /* op1: A,B,C,D, op2: E,F,G,H */
45408 op1
= gen_lowpart (V4SImode
, op1
);
45409 op2
= gen_lowpart (V4SImode
, op2
);
45411 t1
= gen_reg_rtx (V4SImode
);
45412 t2
= gen_reg_rtx (V4SImode
);
45413 t3
= gen_reg_rtx (V2DImode
);
45414 t4
= gen_reg_rtx (V2DImode
);
45417 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
45423 /* t2: (B*E),(A*F),(D*G),(C*H) */
45424 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
45426 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45427 emit_insn (gen_xop_phadddq (t3
, t2
));
45429 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45430 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
45432 /* Multiply lower parts and add all */
45433 t5
= gen_reg_rtx (V2DImode
);
45434 emit_insn (gen_vec_widen_umult_even_v4si (t5
,
45435 gen_lowpart (V4SImode
, op1
),
45436 gen_lowpart (V4SImode
, op2
)));
45437 op0
= expand_binop (mode
, add_optab
, t5
, t4
, op0
, 1, OPTAB_DIRECT
);
45442 enum machine_mode nmode
;
45443 rtx (*umul
) (rtx
, rtx
, rtx
);
45445 if (mode
== V2DImode
)
45447 umul
= gen_vec_widen_umult_even_v4si
;
45450 else if (mode
== V4DImode
)
45452 umul
= gen_vec_widen_umult_even_v8si
;
45455 else if (mode
== V8DImode
)
45457 umul
= gen_vec_widen_umult_even_v16si
;
45461 gcc_unreachable ();
45464 /* Multiply low parts. */
45465 t1
= gen_reg_rtx (mode
);
45466 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
45468 /* Shift input vectors right 32 bits so we can multiply high parts. */
45470 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
45471 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
45473 /* Multiply high parts by low parts. */
45474 t4
= gen_reg_rtx (mode
);
45475 t5
= gen_reg_rtx (mode
);
45476 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
45477 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
45479 /* Combine and shift the highparts back. */
45480 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
45481 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
45483 /* Combine high and low parts. */
45484 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
45487 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
45488 gen_rtx_MULT (mode
, op1
, op2
));
45491 /* Calculate integer abs() using only SSE2 instructions. */
45494 ix86_expand_sse2_abs (rtx target
, rtx input
)
45496 enum machine_mode mode
= GET_MODE (target
);
45501 /* For 32-bit signed integer X, the best way to calculate the absolute
45502 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45504 tmp0
= expand_simple_binop (mode
, ASHIFTRT
, input
,
45505 GEN_INT (GET_MODE_BITSIZE
45506 (GET_MODE_INNER (mode
)) - 1),
45507 NULL
, 0, OPTAB_DIRECT
);
45508 tmp1
= expand_simple_binop (mode
, XOR
, tmp0
, input
,
45509 NULL
, 0, OPTAB_DIRECT
);
45510 x
= expand_simple_binop (mode
, MINUS
, tmp1
, tmp0
,
45511 target
, 0, OPTAB_DIRECT
);
45514 /* For 16-bit signed integer X, the best way to calculate the absolute
45515 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45517 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
45519 x
= expand_simple_binop (mode
, SMAX
, tmp0
, input
,
45520 target
, 0, OPTAB_DIRECT
);
45523 /* For 8-bit signed integer X, the best way to calculate the absolute
45524 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45525 as SSE2 provides the PMINUB insn. */
45527 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
45529 x
= expand_simple_binop (V16QImode
, UMIN
, tmp0
, input
,
45530 target
, 0, OPTAB_DIRECT
);
45534 gcc_unreachable ();
45538 emit_move_insn (target
, x
);
45541 /* Expand an insert into a vector register through pinsr insn.
45542 Return true if successful. */
45545 ix86_expand_pinsr (rtx
*operands
)
45547 rtx dst
= operands
[0];
45548 rtx src
= operands
[3];
45550 unsigned int size
= INTVAL (operands
[1]);
45551 unsigned int pos
= INTVAL (operands
[2]);
45553 if (GET_CODE (dst
) == SUBREG
)
45555 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
45556 dst
= SUBREG_REG (dst
);
45559 if (GET_CODE (src
) == SUBREG
)
45560 src
= SUBREG_REG (src
);
45562 switch (GET_MODE (dst
))
45569 enum machine_mode srcmode
, dstmode
;
45570 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
45572 srcmode
= mode_for_size (size
, MODE_INT
, 0);
45577 if (!TARGET_SSE4_1
)
45579 dstmode
= V16QImode
;
45580 pinsr
= gen_sse4_1_pinsrb
;
45586 dstmode
= V8HImode
;
45587 pinsr
= gen_sse2_pinsrw
;
45591 if (!TARGET_SSE4_1
)
45593 dstmode
= V4SImode
;
45594 pinsr
= gen_sse4_1_pinsrd
;
45598 gcc_assert (TARGET_64BIT
);
45599 if (!TARGET_SSE4_1
)
45601 dstmode
= V2DImode
;
45602 pinsr
= gen_sse4_1_pinsrq
;
45610 if (GET_MODE (dst
) != dstmode
)
45611 d
= gen_reg_rtx (dstmode
);
45612 src
= gen_lowpart (srcmode
, src
);
45616 emit_insn (pinsr (d
, gen_lowpart (dstmode
, dst
), src
,
45617 GEN_INT (1 << pos
)));
45619 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
45628 /* This function returns the calling abi specific va_list type node.
45629 It returns the FNDECL specific va_list type. */
45632 ix86_fn_abi_va_list (tree fndecl
)
45635 return va_list_type_node
;
45636 gcc_assert (fndecl
!= NULL_TREE
);
45638 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
45639 return ms_va_list_type_node
;
45641 return sysv_va_list_type_node
;
45644 /* Returns the canonical va_list type specified by TYPE. If there
45645 is no valid TYPE provided, it return NULL_TREE. */
45648 ix86_canonical_va_list_type (tree type
)
45652 /* Resolve references and pointers to va_list type. */
45653 if (TREE_CODE (type
) == MEM_REF
)
45654 type
= TREE_TYPE (type
);
45655 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
45656 type
= TREE_TYPE (type
);
45657 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
45658 type
= TREE_TYPE (type
);
45660 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
45662 wtype
= va_list_type_node
;
45663 gcc_assert (wtype
!= NULL_TREE
);
45665 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
45667 /* If va_list is an array type, the argument may have decayed
45668 to a pointer type, e.g. by being passed to another function.
45669 In that case, unwrap both types so that we can compare the
45670 underlying records. */
45671 if (TREE_CODE (htype
) == ARRAY_TYPE
45672 || POINTER_TYPE_P (htype
))
45674 wtype
= TREE_TYPE (wtype
);
45675 htype
= TREE_TYPE (htype
);
45678 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
45679 return va_list_type_node
;
45680 wtype
= sysv_va_list_type_node
;
45681 gcc_assert (wtype
!= NULL_TREE
);
45683 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
45685 /* If va_list is an array type, the argument may have decayed
45686 to a pointer type, e.g. by being passed to another function.
45687 In that case, unwrap both types so that we can compare the
45688 underlying records. */
45689 if (TREE_CODE (htype
) == ARRAY_TYPE
45690 || POINTER_TYPE_P (htype
))
45692 wtype
= TREE_TYPE (wtype
);
45693 htype
= TREE_TYPE (htype
);
45696 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
45697 return sysv_va_list_type_node
;
45698 wtype
= ms_va_list_type_node
;
45699 gcc_assert (wtype
!= NULL_TREE
);
45701 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
45703 /* If va_list is an array type, the argument may have decayed
45704 to a pointer type, e.g. by being passed to another function.
45705 In that case, unwrap both types so that we can compare the
45706 underlying records. */
45707 if (TREE_CODE (htype
) == ARRAY_TYPE
45708 || POINTER_TYPE_P (htype
))
45710 wtype
= TREE_TYPE (wtype
);
45711 htype
= TREE_TYPE (htype
);
45714 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
45715 return ms_va_list_type_node
;
45718 return std_canonical_va_list_type (type
);
45721 /* Iterate through the target-specific builtin types for va_list.
45722 IDX denotes the iterator, *PTREE is set to the result type of
45723 the va_list builtin, and *PNAME to its internal type.
45724 Returns zero if there is no element for this index, otherwise
45725 IDX should be increased upon the next call.
45726 Note, do not iterate a base builtin's name like __builtin_va_list.
45727 Used from c_common_nodes_and_builtins. */
45730 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
45740 *ptree
= ms_va_list_type_node
;
45741 *pname
= "__builtin_ms_va_list";
45745 *ptree
= sysv_va_list_type_node
;
45746 *pname
= "__builtin_sysv_va_list";
45754 #undef TARGET_SCHED_DISPATCH
45755 #define TARGET_SCHED_DISPATCH has_dispatch
45756 #undef TARGET_SCHED_DISPATCH_DO
45757 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45758 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45759 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45760 #undef TARGET_SCHED_REORDER
45761 #define TARGET_SCHED_REORDER ix86_sched_reorder
45762 #undef TARGET_SCHED_ADJUST_PRIORITY
45763 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45764 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45765 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45766 ix86_dependencies_evaluation_hook
45768 /* The size of the dispatch window is the total number of bytes of
45769 object code allowed in a window. */
45770 #define DISPATCH_WINDOW_SIZE 16
45772 /* Number of dispatch windows considered for scheduling. */
45773 #define MAX_DISPATCH_WINDOWS 3
45775 /* Maximum number of instructions in a window. */
45778 /* Maximum number of immediate operands in a window. */
45781 /* Maximum number of immediate bits allowed in a window. */
45782 #define MAX_IMM_SIZE 128
45784 /* Maximum number of 32 bit immediates allowed in a window. */
45785 #define MAX_IMM_32 4
45787 /* Maximum number of 64 bit immediates allowed in a window. */
45788 #define MAX_IMM_64 2
45790 /* Maximum total of loads or prefetches allowed in a window. */
45793 /* Maximum total of stores allowed in a window. */
45794 #define MAX_STORE 1
45800 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45801 enum dispatch_group
{
45816 /* Number of allowable groups in a dispatch window. It is an array
45817 indexed by dispatch_group enum. 100 is used as a big number,
45818 because the number of these kind of operations does not have any
45819 effect in dispatch window, but we need them for other reasons in
45821 static unsigned int num_allowable_groups
[disp_last
] = {
45822 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
45825 char group_name
[disp_last
+ 1][16] = {
45826 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45827 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45828 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45831 /* Instruction path. */
45834 path_single
, /* Single micro op. */
45835 path_double
, /* Double micro op. */
45836 path_multi
, /* Instructions with more than 2 micro op.. */
45840 /* sched_insn_info defines a window to the instructions scheduled in
45841 the basic block. It contains a pointer to the insn_info table and
45842 the instruction scheduled.
45844 Windows are allocated for each basic block and are linked
45846 typedef struct sched_insn_info_s
{
45848 enum dispatch_group group
;
45849 enum insn_path path
;
45854 /* Linked list of dispatch windows. This is a two way list of
45855 dispatch windows of a basic block. It contains information about
45856 the number of uops in the window and the total number of
45857 instructions and of bytes in the object code for this dispatch
45859 typedef struct dispatch_windows_s
{
45860 int num_insn
; /* Number of insn in the window. */
45861 int num_uops
; /* Number of uops in the window. */
45862 int window_size
; /* Number of bytes in the window. */
45863 int window_num
; /* Window number between 0 or 1. */
45864 int num_imm
; /* Number of immediates in an insn. */
45865 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
45866 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
45867 int imm_size
; /* Total immediates in the window. */
45868 int num_loads
; /* Total memory loads in the window. */
45869 int num_stores
; /* Total memory stores in the window. */
45870 int violation
; /* Violation exists in window. */
45871 sched_insn_info
*window
; /* Pointer to the window. */
45872 struct dispatch_windows_s
*next
;
45873 struct dispatch_windows_s
*prev
;
45874 } dispatch_windows
;
45876 /* Immediate valuse used in an insn. */
45877 typedef struct imm_info_s
45884 static dispatch_windows
*dispatch_window_list
;
45885 static dispatch_windows
*dispatch_window_list1
;
45887 /* Get dispatch group of insn. */
45889 static enum dispatch_group
45890 get_mem_group (rtx_insn
*insn
)
45892 enum attr_memory memory
;
45894 if (INSN_CODE (insn
) < 0)
45895 return disp_no_group
;
45896 memory
= get_attr_memory (insn
);
45897 if (memory
== MEMORY_STORE
)
45900 if (memory
== MEMORY_LOAD
)
45903 if (memory
== MEMORY_BOTH
)
45904 return disp_load_store
;
45906 return disp_no_group
;
45909 /* Return true if insn is a compare instruction. */
45912 is_cmp (rtx_insn
*insn
)
45914 enum attr_type type
;
45916 type
= get_attr_type (insn
);
45917 return (type
== TYPE_TEST
45918 || type
== TYPE_ICMP
45919 || type
== TYPE_FCMP
45920 || GET_CODE (PATTERN (insn
)) == COMPARE
);
45923 /* Return true if a dispatch violation encountered. */
45926 dispatch_violation (void)
45928 if (dispatch_window_list
->next
)
45929 return dispatch_window_list
->next
->violation
;
45930 return dispatch_window_list
->violation
;
45933 /* Return true if insn is a branch instruction. */
45936 is_branch (rtx insn
)
45938 return (CALL_P (insn
) || JUMP_P (insn
));
45941 /* Return true if insn is a prefetch instruction. */
45944 is_prefetch (rtx insn
)
45946 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
45949 /* This function initializes a dispatch window and the list container holding a
45950 pointer to the window. */
45953 init_window (int window_num
)
45956 dispatch_windows
*new_list
;
45958 if (window_num
== 0)
45959 new_list
= dispatch_window_list
;
45961 new_list
= dispatch_window_list1
;
45963 new_list
->num_insn
= 0;
45964 new_list
->num_uops
= 0;
45965 new_list
->window_size
= 0;
45966 new_list
->next
= NULL
;
45967 new_list
->prev
= NULL
;
45968 new_list
->window_num
= window_num
;
45969 new_list
->num_imm
= 0;
45970 new_list
->num_imm_32
= 0;
45971 new_list
->num_imm_64
= 0;
45972 new_list
->imm_size
= 0;
45973 new_list
->num_loads
= 0;
45974 new_list
->num_stores
= 0;
45975 new_list
->violation
= false;
45977 for (i
= 0; i
< MAX_INSN
; i
++)
45979 new_list
->window
[i
].insn
= NULL
;
45980 new_list
->window
[i
].group
= disp_no_group
;
45981 new_list
->window
[i
].path
= no_path
;
45982 new_list
->window
[i
].byte_len
= 0;
45983 new_list
->window
[i
].imm_bytes
= 0;
45988 /* This function allocates and initializes a dispatch window and the
45989 list container holding a pointer to the window. */
45991 static dispatch_windows
*
45992 allocate_window (void)
45994 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
45995 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
46000 /* This routine initializes the dispatch scheduling information. It
46001 initiates building dispatch scheduler tables and constructs the
46002 first dispatch window. */
46005 init_dispatch_sched (void)
46007 /* Allocate a dispatch list and a window. */
46008 dispatch_window_list
= allocate_window ();
46009 dispatch_window_list1
= allocate_window ();
46014 /* This function returns true if a branch is detected. End of a basic block
46015 does not have to be a branch, but here we assume only branches end a
46019 is_end_basic_block (enum dispatch_group group
)
46021 return group
== disp_branch
;
46024 /* This function is called when the end of a window processing is reached. */
46027 process_end_window (void)
46029 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
46030 if (dispatch_window_list
->next
)
46032 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
46033 gcc_assert (dispatch_window_list
->window_size
46034 + dispatch_window_list1
->window_size
<= 48);
46040 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46041 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46042 for 48 bytes of instructions. Note that these windows are not dispatch
46043 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46045 static dispatch_windows
*
46046 allocate_next_window (int window_num
)
46048 if (window_num
== 0)
46050 if (dispatch_window_list
->next
)
46053 return dispatch_window_list
;
46056 dispatch_window_list
->next
= dispatch_window_list1
;
46057 dispatch_window_list1
->prev
= dispatch_window_list
;
46059 return dispatch_window_list1
;
46062 /* Increment the number of immediate operands of an instruction. */
46065 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
46070 switch ( GET_CODE (*in_rtx
))
46075 (imm_values
->imm
)++;
46076 if (x86_64_immediate_operand (*in_rtx
, SImode
))
46077 (imm_values
->imm32
)++;
46079 (imm_values
->imm64
)++;
46083 (imm_values
->imm
)++;
46084 (imm_values
->imm64
)++;
46088 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
46090 (imm_values
->imm
)++;
46091 (imm_values
->imm32
)++;
46102 /* Compute number of immediate operands of an instruction. */
46105 find_constant (rtx in_rtx
, imm_info
*imm_values
)
46107 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
46108 (rtx_function
) find_constant_1
, (void *) imm_values
);
46111 /* Return total size of immediate operands of an instruction along with number
46112 of corresponding immediate-operands. It initializes its parameters to zero
46113 befor calling FIND_CONSTANT.
46114 INSN is the input instruction. IMM is the total of immediates.
46115 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46119 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
46121 imm_info imm_values
= {0, 0, 0};
46123 find_constant (insn
, &imm_values
);
46124 *imm
= imm_values
.imm
;
46125 *imm32
= imm_values
.imm32
;
46126 *imm64
= imm_values
.imm64
;
46127 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
46130 /* This function indicates if an operand of an instruction is an
46134 has_immediate (rtx insn
)
46136 int num_imm_operand
;
46137 int num_imm32_operand
;
46138 int num_imm64_operand
;
46141 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
46142 &num_imm64_operand
);
46146 /* Return single or double path for instructions. */
46148 static enum insn_path
46149 get_insn_path (rtx_insn
*insn
)
46151 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
46153 if ((int)path
== 0)
46154 return path_single
;
46156 if ((int)path
== 1)
46157 return path_double
;
46162 /* Return insn dispatch group. */
46164 static enum dispatch_group
46165 get_insn_group (rtx_insn
*insn
)
46167 enum dispatch_group group
= get_mem_group (insn
);
46171 if (is_branch (insn
))
46172 return disp_branch
;
46177 if (has_immediate (insn
))
46180 if (is_prefetch (insn
))
46181 return disp_prefetch
;
46183 return disp_no_group
;
46186 /* Count number of GROUP restricted instructions in a dispatch
46187 window WINDOW_LIST. */
46190 count_num_restricted (rtx_insn
*insn
, dispatch_windows
*window_list
)
46192 enum dispatch_group group
= get_insn_group (insn
);
46194 int num_imm_operand
;
46195 int num_imm32_operand
;
46196 int num_imm64_operand
;
46198 if (group
== disp_no_group
)
46201 if (group
== disp_imm
)
46203 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
46204 &num_imm64_operand
);
46205 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
46206 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
46207 || (num_imm32_operand
> 0
46208 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
46209 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
46210 || (num_imm64_operand
> 0
46211 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
46212 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
46213 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
46214 && num_imm64_operand
> 0
46215 && ((window_list
->num_imm_64
> 0
46216 && window_list
->num_insn
>= 2)
46217 || window_list
->num_insn
>= 3)))
46223 if ((group
== disp_load_store
46224 && (window_list
->num_loads
>= MAX_LOAD
46225 || window_list
->num_stores
>= MAX_STORE
))
46226 || ((group
== disp_load
46227 || group
== disp_prefetch
)
46228 && window_list
->num_loads
>= MAX_LOAD
)
46229 || (group
== disp_store
46230 && window_list
->num_stores
>= MAX_STORE
))
46236 /* This function returns true if insn satisfies dispatch rules on the
46237 last window scheduled. */
46240 fits_dispatch_window (rtx_insn
*insn
)
46242 dispatch_windows
*window_list
= dispatch_window_list
;
46243 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
46244 unsigned int num_restrict
;
46245 enum dispatch_group group
= get_insn_group (insn
);
46246 enum insn_path path
= get_insn_path (insn
);
46249 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46250 instructions should be given the lowest priority in the
46251 scheduling process in Haifa scheduler to make sure they will be
46252 scheduled in the same dispatch window as the reference to them. */
46253 if (group
== disp_jcc
|| group
== disp_cmp
)
46256 /* Check nonrestricted. */
46257 if (group
== disp_no_group
|| group
== disp_branch
)
46260 /* Get last dispatch window. */
46261 if (window_list_next
)
46262 window_list
= window_list_next
;
46264 if (window_list
->window_num
== 1)
46266 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
46269 || (min_insn_size (insn
) + sum
) >= 48)
46270 /* Window 1 is full. Go for next window. */
46274 num_restrict
= count_num_restricted (insn
, window_list
);
46276 if (num_restrict
> num_allowable_groups
[group
])
46279 /* See if it fits in the first window. */
46280 if (window_list
->window_num
== 0)
46282 /* The first widow should have only single and double path
46284 if (path
== path_double
46285 && (window_list
->num_uops
+ 2) > MAX_INSN
)
46287 else if (path
!= path_single
)
46293 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46294 dispatch window WINDOW_LIST. */
46297 add_insn_window (rtx_insn
*insn
, dispatch_windows
*window_list
, int num_uops
)
46299 int byte_len
= min_insn_size (insn
);
46300 int num_insn
= window_list
->num_insn
;
46302 sched_insn_info
*window
= window_list
->window
;
46303 enum dispatch_group group
= get_insn_group (insn
);
46304 enum insn_path path
= get_insn_path (insn
);
46305 int num_imm_operand
;
46306 int num_imm32_operand
;
46307 int num_imm64_operand
;
46309 if (!window_list
->violation
&& group
!= disp_cmp
46310 && !fits_dispatch_window (insn
))
46311 window_list
->violation
= true;
46313 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
46314 &num_imm64_operand
);
46316 /* Initialize window with new instruction. */
46317 window
[num_insn
].insn
= insn
;
46318 window
[num_insn
].byte_len
= byte_len
;
46319 window
[num_insn
].group
= group
;
46320 window
[num_insn
].path
= path
;
46321 window
[num_insn
].imm_bytes
= imm_size
;
46323 window_list
->window_size
+= byte_len
;
46324 window_list
->num_insn
= num_insn
+ 1;
46325 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
46326 window_list
->imm_size
+= imm_size
;
46327 window_list
->num_imm
+= num_imm_operand
;
46328 window_list
->num_imm_32
+= num_imm32_operand
;
46329 window_list
->num_imm_64
+= num_imm64_operand
;
46331 if (group
== disp_store
)
46332 window_list
->num_stores
+= 1;
46333 else if (group
== disp_load
46334 || group
== disp_prefetch
)
46335 window_list
->num_loads
+= 1;
46336 else if (group
== disp_load_store
)
46338 window_list
->num_stores
+= 1;
46339 window_list
->num_loads
+= 1;
46343 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46344 If the total bytes of instructions or the number of instructions in
46345 the window exceed allowable, it allocates a new window. */
46348 add_to_dispatch_window (rtx_insn
*insn
)
46351 dispatch_windows
*window_list
;
46352 dispatch_windows
*next_list
;
46353 dispatch_windows
*window0_list
;
46354 enum insn_path path
;
46355 enum dispatch_group insn_group
;
46363 if (INSN_CODE (insn
) < 0)
46366 byte_len
= min_insn_size (insn
);
46367 window_list
= dispatch_window_list
;
46368 next_list
= window_list
->next
;
46369 path
= get_insn_path (insn
);
46370 insn_group
= get_insn_group (insn
);
46372 /* Get the last dispatch window. */
46374 window_list
= dispatch_window_list
->next
;
46376 if (path
== path_single
)
46378 else if (path
== path_double
)
46381 insn_num_uops
= (int) path
;
46383 /* If current window is full, get a new window.
46384 Window number zero is full, if MAX_INSN uops are scheduled in it.
46385 Window number one is full, if window zero's bytes plus window
46386 one's bytes is 32, or if the bytes of the new instruction added
46387 to the total makes it greater than 48, or it has already MAX_INSN
46388 instructions in it. */
46389 num_insn
= window_list
->num_insn
;
46390 num_uops
= window_list
->num_uops
;
46391 window_num
= window_list
->window_num
;
46392 insn_fits
= fits_dispatch_window (insn
);
46394 if (num_insn
>= MAX_INSN
46395 || num_uops
+ insn_num_uops
> MAX_INSN
46398 window_num
= ~window_num
& 1;
46399 window_list
= allocate_next_window (window_num
);
46402 if (window_num
== 0)
46404 add_insn_window (insn
, window_list
, insn_num_uops
);
46405 if (window_list
->num_insn
>= MAX_INSN
46406 && insn_group
== disp_branch
)
46408 process_end_window ();
46412 else if (window_num
== 1)
46414 window0_list
= window_list
->prev
;
46415 sum
= window0_list
->window_size
+ window_list
->window_size
;
46417 || (byte_len
+ sum
) >= 48)
46419 process_end_window ();
46420 window_list
= dispatch_window_list
;
46423 add_insn_window (insn
, window_list
, insn_num_uops
);
46426 gcc_unreachable ();
46428 if (is_end_basic_block (insn_group
))
46430 /* End of basic block is reached do end-basic-block process. */
46431 process_end_window ();
46436 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46438 DEBUG_FUNCTION
static void
46439 debug_dispatch_window_file (FILE *file
, int window_num
)
46441 dispatch_windows
*list
;
46444 if (window_num
== 0)
46445 list
= dispatch_window_list
;
46447 list
= dispatch_window_list1
;
46449 fprintf (file
, "Window #%d:\n", list
->window_num
);
46450 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
46451 list
->num_insn
, list
->num_uops
, list
->window_size
);
46452 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46453 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
46455 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
46457 fprintf (file
, " insn info:\n");
46459 for (i
= 0; i
< MAX_INSN
; i
++)
46461 if (!list
->window
[i
].insn
)
46463 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46464 i
, group_name
[list
->window
[i
].group
],
46465 i
, (void *)list
->window
[i
].insn
,
46466 i
, list
->window
[i
].path
,
46467 i
, list
->window
[i
].byte_len
,
46468 i
, list
->window
[i
].imm_bytes
);
46472 /* Print to stdout a dispatch window. */
46474 DEBUG_FUNCTION
void
46475 debug_dispatch_window (int window_num
)
46477 debug_dispatch_window_file (stdout
, window_num
);
46480 /* Print INSN dispatch information to FILE. */
46482 DEBUG_FUNCTION
static void
46483 debug_insn_dispatch_info_file (FILE *file
, rtx_insn
*insn
)
46486 enum insn_path path
;
46487 enum dispatch_group group
;
46489 int num_imm_operand
;
46490 int num_imm32_operand
;
46491 int num_imm64_operand
;
46493 if (INSN_CODE (insn
) < 0)
46496 byte_len
= min_insn_size (insn
);
46497 path
= get_insn_path (insn
);
46498 group
= get_insn_group (insn
);
46499 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
46500 &num_imm64_operand
);
46502 fprintf (file
, " insn info:\n");
46503 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
46504 group_name
[group
], path
, byte_len
);
46505 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46506 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
46509 /* Print to STDERR the status of the ready list with respect to
46510 dispatch windows. */
46512 DEBUG_FUNCTION
void
46513 debug_ready_dispatch (void)
46516 int no_ready
= number_in_ready ();
46518 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
46520 for (i
= 0; i
< no_ready
; i
++)
46521 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
46524 /* This routine is the driver of the dispatch scheduler. */
46527 do_dispatch (rtx_insn
*insn
, int mode
)
46529 if (mode
== DISPATCH_INIT
)
46530 init_dispatch_sched ();
46531 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
46532 add_to_dispatch_window (insn
);
46535 /* Return TRUE if Dispatch Scheduling is supported. */
46538 has_dispatch (rtx_insn
*insn
, int action
)
46540 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
|| TARGET_BDVER4
)
46541 && flag_dispatch_scheduler
)
46547 case IS_DISPATCH_ON
:
46552 return is_cmp (insn
);
46554 case DISPATCH_VIOLATION
:
46555 return dispatch_violation ();
46557 case FITS_DISPATCH_WINDOW
:
46558 return fits_dispatch_window (insn
);
46564 /* Implementation of reassociation_width target hook used by
46565 reassoc phase to identify parallelism level in reassociated
46566 tree. Statements tree_code is passed in OPC. Arguments type
46569 Currently parallel reassociation is enabled for Atom
46570 processors only and we set reassociation width to be 2
46571 because Atom may issue up to 2 instructions per cycle.
46573 Return value should be fixed if parallel reassociation is
46574 enabled for other processors. */
46577 ix86_reassociation_width (unsigned int, enum machine_mode mode
)
46582 if (VECTOR_MODE_P (mode
))
46584 if (TARGET_VECTOR_PARALLEL_EXECUTION
)
46591 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
46593 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
46599 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46600 place emms and femms instructions. */
46602 static enum machine_mode
46603 ix86_preferred_simd_mode (enum machine_mode mode
)
46611 return TARGET_AVX512BW
? V64QImode
:
46612 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
46614 return TARGET_AVX512BW
? V32HImode
:
46615 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
46617 return TARGET_AVX512F
? V16SImode
:
46618 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
46620 return TARGET_AVX512F
? V8DImode
:
46621 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
46624 if (TARGET_AVX512F
)
46626 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
46632 if (!TARGET_VECTORIZE_DOUBLE
)
46634 else if (TARGET_AVX512F
)
46636 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
46638 else if (TARGET_SSE2
)
46647 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46648 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46649 256bit and 128bit vectors. */
46651 static unsigned int
46652 ix86_autovectorize_vector_sizes (void)
46654 return TARGET_AVX512F
? 64 | 32 | 16 :
46655 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
46660 /* Return class of registers which could be used for pseudo of MODE
46661 and of class RCLASS for spilling instead of memory. Return NO_REGS
46662 if it is not possible or non-profitable. */
46664 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
46666 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
46667 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
46668 && rclass
!= NO_REGS
&& INTEGER_CLASS_P (rclass
))
46669 return ALL_SSE_REGS
;
46673 /* Implement targetm.vectorize.init_cost. */
46676 ix86_init_cost (struct loop
*)
46678 unsigned *cost
= XNEWVEC (unsigned, 3);
46679 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
46683 /* Implement targetm.vectorize.add_stmt_cost. */
46686 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
46687 struct _stmt_vec_info
*stmt_info
, int misalign
,
46688 enum vect_cost_model_location where
)
46690 unsigned *cost
= (unsigned *) data
;
46691 unsigned retval
= 0;
46693 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
46694 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
46696 /* Statements in an inner loop relative to the loop being
46697 vectorized are weighted more heavily. The value here is
46698 arbitrary and could potentially be improved with analysis. */
46699 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
46700 count
*= 50; /* FIXME. */
46702 retval
= (unsigned) (count
* stmt_cost
);
46704 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46705 for Silvermont as it has out of order integer pipeline and can execute
46706 2 scalar instruction per tick, but has in order SIMD pipeline. */
46707 if (TARGET_SILVERMONT
|| TARGET_INTEL
)
46708 if (stmt_info
&& stmt_info
->stmt
)
46710 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
46711 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
46712 retval
= (retval
* 17) / 10;
46715 cost
[where
] += retval
;
46720 /* Implement targetm.vectorize.finish_cost. */
46723 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
46724 unsigned *body_cost
, unsigned *epilogue_cost
)
46726 unsigned *cost
= (unsigned *) data
;
46727 *prologue_cost
= cost
[vect_prologue
];
46728 *body_cost
= cost
[vect_body
];
46729 *epilogue_cost
= cost
[vect_epilogue
];
46732 /* Implement targetm.vectorize.destroy_cost_data. */
46735 ix86_destroy_cost_data (void *data
)
46740 /* Validate target specific memory model bits in VAL. */
46742 static unsigned HOST_WIDE_INT
46743 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
46745 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
46748 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
46750 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
46752 warning (OPT_Winvalid_memory_model
,
46753 "Unknown architecture specific memory model");
46754 return MEMMODEL_SEQ_CST
;
46756 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
46757 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
46759 warning (OPT_Winvalid_memory_model
,
46760 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46761 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
46763 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
46765 warning (OPT_Winvalid_memory_model
,
46766 "HLE_RELEASE not used with RELEASE or stronger memory model");
46767 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
46772 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46773 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46774 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46775 or number of vecsize_mangle variants that should be emitted. */
46778 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
46779 struct cgraph_simd_clone
*clonei
,
46780 tree base_type
, int num
)
46784 if (clonei
->simdlen
46785 && (clonei
->simdlen
< 2
46786 || clonei
->simdlen
> 16
46787 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
46789 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
46790 "unsupported simdlen %d", clonei
->simdlen
);
46794 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
46795 if (TREE_CODE (ret_type
) != VOID_TYPE
)
46796 switch (TYPE_MODE (ret_type
))
46808 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
46809 "unsupported return type %qT for simd\n", ret_type
);
46816 for (t
= DECL_ARGUMENTS (node
->decl
), i
= 0; t
; t
= DECL_CHAIN (t
), i
++)
46817 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46818 switch (TYPE_MODE (TREE_TYPE (t
)))
46830 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
46831 "unsupported argument type %qT for simd\n", TREE_TYPE (t
));
46835 if (clonei
->cilk_elemental
)
46837 /* Parse here processor clause. If not present, default to 'b'. */
46838 clonei
->vecsize_mangle
= 'b';
46840 else if (!TREE_PUBLIC (node
->decl
))
46842 /* If the function isn't exported, we can pick up just one ISA
46845 clonei
->vecsize_mangle
= 'd';
46846 else if (TARGET_AVX
)
46847 clonei
->vecsize_mangle
= 'c';
46849 clonei
->vecsize_mangle
= 'b';
46854 clonei
->vecsize_mangle
= "bcd"[num
];
46857 switch (clonei
->vecsize_mangle
)
46860 clonei
->vecsize_int
= 128;
46861 clonei
->vecsize_float
= 128;
46864 clonei
->vecsize_int
= 128;
46865 clonei
->vecsize_float
= 256;
46868 clonei
->vecsize_int
= 256;
46869 clonei
->vecsize_float
= 256;
46872 if (clonei
->simdlen
== 0)
46874 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
46875 clonei
->simdlen
= clonei
->vecsize_int
;
46877 clonei
->simdlen
= clonei
->vecsize_float
;
46878 clonei
->simdlen
/= GET_MODE_BITSIZE (TYPE_MODE (base_type
));
46879 if (clonei
->simdlen
> 16)
46880 clonei
->simdlen
= 16;
46885 /* Add target attribute to SIMD clone NODE if needed. */
46888 ix86_simd_clone_adjust (struct cgraph_node
*node
)
46890 const char *str
= NULL
;
46891 gcc_assert (node
->decl
== cfun
->decl
);
46892 switch (node
->simdclone
->vecsize_mangle
)
46907 gcc_unreachable ();
46912 tree args
= build_tree_list (NULL_TREE
, build_string (strlen (str
), str
));
46913 bool ok
= ix86_valid_target_attribute_p (node
->decl
, NULL
, args
, 0);
46916 ix86_previous_fndecl
= NULL_TREE
;
46917 ix86_set_current_function (node
->decl
);
46920 /* If SIMD clone NODE can't be used in a vectorized loop
46921 in current function, return -1, otherwise return a badness of using it
46922 (0 if it is most desirable from vecsize_mangle point of view, 1
46923 slightly less desirable, etc.). */
46926 ix86_simd_clone_usable (struct cgraph_node
*node
)
46928 switch (node
->simdclone
->vecsize_mangle
)
46935 return TARGET_AVX2
? 2 : 1;
46939 return TARGET_AVX2
? 1 : 0;
46946 gcc_unreachable ();
46950 /* This function gives out the number of memory references.
46951 This value determines the unrolling factor for
46952 bdver3 and bdver4 architectures. */
46955 ix86_loop_memcount (rtx
*x
, unsigned *mem_count
)
46957 if (*x
!= NULL_RTX
&& MEM_P (*x
))
46959 enum machine_mode mode
;
46960 unsigned int n_words
;
46962 mode
= GET_MODE (*x
);
46963 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
46973 /* This function adjusts the unroll factor based on
46974 the hardware capabilities. For ex, bdver3 has
46975 a loop buffer which makes unrolling of smaller
46976 loops less important. This function decides the
46977 unroll factor using number of memory references
46978 (value 32 is used) as a heuristic. */
46981 ix86_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
46986 unsigned mem_count
= 0;
46988 if (!TARGET_ADJUST_UNROLL
)
46991 /* Count the number of memory references within the loop body. */
46992 bbs
= get_loop_body (loop
);
46993 for (i
= 0; i
< loop
->num_nodes
; i
++)
46995 for (insn
= BB_HEAD (bbs
[i
]); insn
!= BB_END (bbs
[i
]); insn
= NEXT_INSN (insn
))
46996 if (NONDEBUG_INSN_P (insn
))
46997 for_each_rtx_in_insn (&insn
, (rtx_function
) ix86_loop_memcount
,
47002 if (mem_count
&& mem_count
<=32)
47003 return 32/mem_count
;
47009 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47012 ix86_float_exceptions_rounding_supported_p (void)
47014 /* For x87 floating point with standard excess precision handling,
47015 there is no adddf3 pattern (since x87 floating point only has
47016 XFmode operations) so the default hook implementation gets this
47018 return TARGET_80387
|| TARGET_SSE_MATH
;
47021 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47024 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
47026 if (!TARGET_80387
&& !TARGET_SSE_MATH
)
47028 tree exceptions_var
= create_tmp_var (integer_type_node
, NULL
);
47031 tree fenv_index_type
= build_index_type (size_int (6));
47032 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
47033 tree fenv_var
= create_tmp_var (fenv_type
, NULL
);
47034 mark_addressable (fenv_var
);
47035 tree fenv_ptr
= build_pointer_type (fenv_type
);
47036 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
47037 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
47038 tree fnstenv
= ix86_builtins
[IX86_BUILTIN_FNSTENV
];
47039 tree fldenv
= ix86_builtins
[IX86_BUILTIN_FLDENV
];
47040 tree fnstsw
= ix86_builtins
[IX86_BUILTIN_FNSTSW
];
47041 tree fnclex
= ix86_builtins
[IX86_BUILTIN_FNCLEX
];
47042 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
47043 tree hold_fnclex
= build_call_expr (fnclex
, 0);
47044 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_fnstenv
,
47046 *clear
= build_call_expr (fnclex
, 0);
47047 tree sw_var
= create_tmp_var (short_unsigned_type_node
, NULL
);
47048 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
47049 tree sw_mod
= build2 (MODIFY_EXPR
, short_unsigned_type_node
,
47050 sw_var
, fnstsw_call
);
47051 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
47052 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
47053 exceptions_var
, exceptions_x87
);
47054 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
47055 sw_mod
, update_mod
);
47056 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
47057 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
47059 if (TARGET_SSE_MATH
)
47061 tree mxcsr_orig_var
= create_tmp_var (unsigned_type_node
, NULL
);
47062 tree mxcsr_mod_var
= create_tmp_var (unsigned_type_node
, NULL
);
47063 tree stmxcsr
= ix86_builtins
[IX86_BUILTIN_STMXCSR
];
47064 tree ldmxcsr
= ix86_builtins
[IX86_BUILTIN_LDMXCSR
];
47065 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
47066 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
47067 mxcsr_orig_var
, stmxcsr_hold_call
);
47068 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
47070 build_int_cst (unsigned_type_node
, 0x1f80));
47071 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
47072 build_int_cst (unsigned_type_node
, 0xffffffc0));
47073 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
47074 mxcsr_mod_var
, hold_mod_val
);
47075 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
47076 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
47077 hold_assign_orig
, hold_assign_mod
);
47078 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
47079 ldmxcsr_hold_call
);
47081 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
47084 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
47086 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
47087 ldmxcsr_clear_call
);
47089 *clear
= ldmxcsr_clear_call
;
47090 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
47091 tree exceptions_sse
= fold_convert (integer_type_node
,
47092 stxmcsr_update_call
);
47095 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
47096 exceptions_var
, exceptions_sse
);
47097 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
47098 exceptions_var
, exceptions_mod
);
47099 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
47100 exceptions_assign
);
47103 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
47104 exceptions_var
, exceptions_sse
);
47105 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
47106 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
47107 ldmxcsr_update_call
);
47109 tree atomic_feraiseexcept
47110 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
47111 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
47112 1, exceptions_var
);
47113 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
47114 atomic_feraiseexcept_call
);
47117 /* Initialize the GCC target structure. */
47118 #undef TARGET_RETURN_IN_MEMORY
47119 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47121 #undef TARGET_LEGITIMIZE_ADDRESS
47122 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47124 #undef TARGET_ATTRIBUTE_TABLE
47125 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47126 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47127 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47129 # undef TARGET_MERGE_DECL_ATTRIBUTES
47130 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47133 #undef TARGET_COMP_TYPE_ATTRIBUTES
47134 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47136 #undef TARGET_INIT_BUILTINS
47137 #define TARGET_INIT_BUILTINS ix86_init_builtins
47138 #undef TARGET_BUILTIN_DECL
47139 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47140 #undef TARGET_EXPAND_BUILTIN
47141 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47143 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47144 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47145 ix86_builtin_vectorized_function
47147 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47148 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47150 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47151 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47153 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47154 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47156 #undef TARGET_BUILTIN_RECIPROCAL
47157 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47159 #undef TARGET_ASM_FUNCTION_EPILOGUE
47160 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47162 #undef TARGET_ENCODE_SECTION_INFO
47163 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47164 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47166 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47169 #undef TARGET_ASM_OPEN_PAREN
47170 #define TARGET_ASM_OPEN_PAREN ""
47171 #undef TARGET_ASM_CLOSE_PAREN
47172 #define TARGET_ASM_CLOSE_PAREN ""
47174 #undef TARGET_ASM_BYTE_OP
47175 #define TARGET_ASM_BYTE_OP ASM_BYTE
47177 #undef TARGET_ASM_ALIGNED_HI_OP
47178 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47179 #undef TARGET_ASM_ALIGNED_SI_OP
47180 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47182 #undef TARGET_ASM_ALIGNED_DI_OP
47183 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47186 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47187 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47189 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47190 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47192 #undef TARGET_ASM_UNALIGNED_HI_OP
47193 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47194 #undef TARGET_ASM_UNALIGNED_SI_OP
47195 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47196 #undef TARGET_ASM_UNALIGNED_DI_OP
47197 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47199 #undef TARGET_PRINT_OPERAND
47200 #define TARGET_PRINT_OPERAND ix86_print_operand
47201 #undef TARGET_PRINT_OPERAND_ADDRESS
47202 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47203 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47204 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47205 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47206 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47208 #undef TARGET_SCHED_INIT_GLOBAL
47209 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47210 #undef TARGET_SCHED_ADJUST_COST
47211 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47212 #undef TARGET_SCHED_ISSUE_RATE
47213 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47214 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47215 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47216 ia32_multipass_dfa_lookahead
47217 #undef TARGET_SCHED_MACRO_FUSION_P
47218 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47219 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47220 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47222 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47223 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47225 #undef TARGET_MEMMODEL_CHECK
47226 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47228 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47229 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47232 #undef TARGET_HAVE_TLS
47233 #define TARGET_HAVE_TLS true
47235 #undef TARGET_CANNOT_FORCE_CONST_MEM
47236 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47237 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47238 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47240 #undef TARGET_DELEGITIMIZE_ADDRESS
47241 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47243 #undef TARGET_MS_BITFIELD_LAYOUT_P
47244 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47247 #undef TARGET_BINDS_LOCAL_P
47248 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47250 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47251 #undef TARGET_BINDS_LOCAL_P
47252 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47255 #undef TARGET_ASM_OUTPUT_MI_THUNK
47256 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47257 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47258 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47260 #undef TARGET_ASM_FILE_START
47261 #define TARGET_ASM_FILE_START x86_file_start
47263 #undef TARGET_OPTION_OVERRIDE
47264 #define TARGET_OPTION_OVERRIDE ix86_option_override
47266 #undef TARGET_REGISTER_MOVE_COST
47267 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47268 #undef TARGET_MEMORY_MOVE_COST
47269 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47270 #undef TARGET_RTX_COSTS
47271 #define TARGET_RTX_COSTS ix86_rtx_costs
47272 #undef TARGET_ADDRESS_COST
47273 #define TARGET_ADDRESS_COST ix86_address_cost
47275 #undef TARGET_FIXED_CONDITION_CODE_REGS
47276 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47277 #undef TARGET_CC_MODES_COMPATIBLE
47278 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47280 #undef TARGET_MACHINE_DEPENDENT_REORG
47281 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47283 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47284 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47286 #undef TARGET_BUILD_BUILTIN_VA_LIST
47287 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47289 #undef TARGET_FOLD_BUILTIN
47290 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47292 #undef TARGET_COMPARE_VERSION_PRIORITY
47293 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47295 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47296 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47297 ix86_generate_version_dispatcher_body
47299 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47300 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47301 ix86_get_function_versions_dispatcher
47303 #undef TARGET_ENUM_VA_LIST_P
47304 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47306 #undef TARGET_FN_ABI_VA_LIST
47307 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47309 #undef TARGET_CANONICAL_VA_LIST_TYPE
47310 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47312 #undef TARGET_EXPAND_BUILTIN_VA_START
47313 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47315 #undef TARGET_MD_ASM_CLOBBERS
47316 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47318 #undef TARGET_PROMOTE_PROTOTYPES
47319 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47320 #undef TARGET_SETUP_INCOMING_VARARGS
47321 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47322 #undef TARGET_MUST_PASS_IN_STACK
47323 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47324 #undef TARGET_FUNCTION_ARG_ADVANCE
47325 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47326 #undef TARGET_FUNCTION_ARG
47327 #define TARGET_FUNCTION_ARG ix86_function_arg
47328 #undef TARGET_FUNCTION_ARG_BOUNDARY
47329 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47330 #undef TARGET_PASS_BY_REFERENCE
47331 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47332 #undef TARGET_INTERNAL_ARG_POINTER
47333 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47334 #undef TARGET_UPDATE_STACK_BOUNDARY
47335 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47336 #undef TARGET_GET_DRAP_RTX
47337 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47338 #undef TARGET_STRICT_ARGUMENT_NAMING
47339 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47340 #undef TARGET_STATIC_CHAIN
47341 #define TARGET_STATIC_CHAIN ix86_static_chain
47342 #undef TARGET_TRAMPOLINE_INIT
47343 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47344 #undef TARGET_RETURN_POPS_ARGS
47345 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47347 #undef TARGET_LEGITIMATE_COMBINED_INSN
47348 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47350 #undef TARGET_ASAN_SHADOW_OFFSET
47351 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47353 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47354 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47356 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47357 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47359 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47360 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47362 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
47363 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
47364 ix86_libgcc_floating_mode_supported_p
47366 #undef TARGET_C_MODE_FOR_SUFFIX
47367 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47370 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47371 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47374 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47375 #undef TARGET_INSERT_ATTRIBUTES
47376 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47379 #undef TARGET_MANGLE_TYPE
47380 #define TARGET_MANGLE_TYPE ix86_mangle_type
47383 #undef TARGET_STACK_PROTECT_FAIL
47384 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47387 #undef TARGET_FUNCTION_VALUE
47388 #define TARGET_FUNCTION_VALUE ix86_function_value
47390 #undef TARGET_FUNCTION_VALUE_REGNO_P
47391 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47393 #undef TARGET_PROMOTE_FUNCTION_MODE
47394 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47396 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47397 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47399 #undef TARGET_INSTANTIATE_DECLS
47400 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47402 #undef TARGET_SECONDARY_RELOAD
47403 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47405 #undef TARGET_CLASS_MAX_NREGS
47406 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47408 #undef TARGET_PREFERRED_RELOAD_CLASS
47409 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47410 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47411 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47412 #undef TARGET_CLASS_LIKELY_SPILLED_P
47413 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47415 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47416 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47417 ix86_builtin_vectorization_cost
47418 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47419 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47420 ix86_vectorize_vec_perm_const_ok
47421 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47422 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47423 ix86_preferred_simd_mode
47424 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47425 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47426 ix86_autovectorize_vector_sizes
47427 #undef TARGET_VECTORIZE_INIT_COST
47428 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47429 #undef TARGET_VECTORIZE_ADD_STMT_COST
47430 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47431 #undef TARGET_VECTORIZE_FINISH_COST
47432 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47433 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47434 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47436 #undef TARGET_SET_CURRENT_FUNCTION
47437 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47439 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47440 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47442 #undef TARGET_OPTION_SAVE
47443 #define TARGET_OPTION_SAVE ix86_function_specific_save
47445 #undef TARGET_OPTION_RESTORE
47446 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47448 #undef TARGET_OPTION_PRINT
47449 #define TARGET_OPTION_PRINT ix86_function_specific_print
47451 #undef TARGET_OPTION_FUNCTION_VERSIONS
47452 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47454 #undef TARGET_CAN_INLINE_P
47455 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47457 #undef TARGET_EXPAND_TO_RTL_HOOK
47458 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47460 #undef TARGET_LEGITIMATE_ADDRESS_P
47461 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47463 #undef TARGET_LRA_P
47464 #define TARGET_LRA_P hook_bool_void_true
47466 #undef TARGET_REGISTER_PRIORITY
47467 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47469 #undef TARGET_REGISTER_USAGE_LEVELING_P
47470 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47472 #undef TARGET_LEGITIMATE_CONSTANT_P
47473 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47475 #undef TARGET_FRAME_POINTER_REQUIRED
47476 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47478 #undef TARGET_CAN_ELIMINATE
47479 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47481 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47482 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47484 #undef TARGET_ASM_CODE_END
47485 #define TARGET_ASM_CODE_END ix86_code_end
47487 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47488 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47491 #undef TARGET_INIT_LIBFUNCS
47492 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47495 #undef TARGET_LOOP_UNROLL_ADJUST
47496 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47498 #undef TARGET_SPILL_CLASS
47499 #define TARGET_SPILL_CLASS ix86_spill_class
47501 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47502 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47503 ix86_simd_clone_compute_vecsize_and_simdlen
47505 #undef TARGET_SIMD_CLONE_ADJUST
47506 #define TARGET_SIMD_CLONE_ADJUST \
47507 ix86_simd_clone_adjust
47509 #undef TARGET_SIMD_CLONE_USABLE
47510 #define TARGET_SIMD_CLONE_USABLE \
47511 ix86_simd_clone_usable
47513 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47514 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47515 ix86_float_exceptions_rounding_supported_p
47517 #undef TARGET_MODE_EMIT
47518 #define TARGET_MODE_EMIT ix86_emit_mode_set
47520 #undef TARGET_MODE_NEEDED
47521 #define TARGET_MODE_NEEDED ix86_mode_needed
47523 #undef TARGET_MODE_AFTER
47524 #define TARGET_MODE_AFTER ix86_mode_after
47526 #undef TARGET_MODE_ENTRY
47527 #define TARGET_MODE_ENTRY ix86_mode_entry
47529 #undef TARGET_MODE_EXIT
47530 #define TARGET_MODE_EXIT ix86_mode_exit
47532 #undef TARGET_MODE_PRIORITY
47533 #define TARGET_MODE_PRIORITY ix86_mode_priority
47535 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47536 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47538 struct gcc_target targetm
= TARGET_INITIALIZER
;
47540 #include "gt-i386.h"